collectAsMap()
返回hashMap包含所有RDD中的分片,key如果重复,后边的元素会覆盖前面的元素。/** * Return the key-value pairs in this RDD to the master as a Map. * * Warning: this doesn't return a multimap (so if you have multiple values to the same key, only * one value per key is PReserved in the map returned) * * @note this method should only be used if the resulting data is expected to be small, as * all the data is loaded into the driver's memory. */def collectAsMap(): Map[K, V] = self.withScope { val data = self.collect() val map = new mutable.HashMap[K, V] map.sizeHint(data.length) data.foreach { pair => map.put(pair._1, pair._2) } map}package com.dt.spark.main.RDDLearn.RDDCollectAsMapimport org.apache.spark.{SparkContext, SparkConf}/** * Created by hjw on 17/2/4. *//*collect (较常用)将RDD中的数据收集起来,变成一个Array,仅限数据量比较小的时候。collectAsMap()返回hashMap包含所有RDD中的分片,key如果重复,后边的元素会覆盖前面的元素。/** * Return the key-value pairs in this RDD to the master as a Map. * * Warning: this doesn't return a multimap (so if you have multiple values to the same key, only * one value per key is preserved in the map returned) * * @note this method should only be used if the resulting data is expected to be small, as * all the data is loaded into the driver's memory. */ def collectAsMap(): Map[K, V] = self.withScope { val data = self.collect() val map = new mutable.HashMap[K, V] map.sizeHint(data.length) data.foreach { pair => map.put(pair._1, pair._2) } map } */object RDDCollectAsMapTest { def main(args: Array[String]) { val conf = new SparkConf() conf.setAppName("test") conf.setMaster("local") val sc = new SparkContext(conf) val rdd = sc.parallelize(List((1, "a"), (1, "b"), (1, "c"), (2, "d"), (2, "e"), (3, "g")) ) val rddMap = rdd.collectAsMap() rddMap.foreach(println(_))// (2,e)// (1,c)// (3,g) }}
新闻热点
疑难解答