public class HadoopRDD<K,V> extends RDD<scala.Tuple2<K,V>> implements Logging
org.apache.hadoop.mapred
).
Note: Instantiating this class directly is not recommended, please use
org.apache.spark.SparkContext.hadoopRDD()
Constructor and Description |
---|
HadoopRDD(SparkContext sc,
Broadcast<SerializableWritable<org.apache.hadoop.conf.Configuration>> broadcastedConf,
scala.Option<scala.Function1<org.apache.hadoop.mapred.JobConf,scala.runtime.BoxedUnit>> initLocalJobConfFuncOpt,
Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass,
Class<K> keyClass,
Class<V> valueClass,
int minPartitions) |
HadoopRDD(SparkContext sc,
org.apache.hadoop.mapred.JobConf conf,
Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass,
Class<K> keyClass,
Class<V> valueClass,
int minPartitions) |
Modifier and Type | Method and Description |
---|---|
static void |
addLocalConfiguration(String jobTrackerId,
int jobId,
int splitId,
int attemptId,
org.apache.hadoop.mapred.JobConf conf)
Add Hadoop configuration specific to a single partition and attempt.
|
void |
checkpoint()
Mark this RDD for checkpointing.
|
InterruptibleIterator<scala.Tuple2<K,V>> |
compute(Partition theSplit,
TaskContext context)
:: DeveloperApi ::
Implemented by subclasses to compute a given partition.
|
static boolean |
containsCachedMetadata(String key) |
static Object |
getCachedMetadata(String key)
The three methods below are helpers for accessing the local map, a property of the SparkEnv of
the local process.
|
org.apache.hadoop.conf.Configuration |
getConf() |
Partition[] |
getPartitions()
Implemented by subclasses to return the set of partitions in this RDD.
|
scala.collection.Seq<String> |
getPreferredLocations(Partition split)
Optionally overridden by subclasses to specify placement preferences.
|
static Object |
putCachedMetadata(String key,
Object value) |
aggregate, cache, cartesian, checkpointData, coalesce, collect, collect, context, count, countApprox, countApproxDistinct, countByValue, countByValueApprox, creationSiteInfo, dependencies, distinct, distinct, filter, filterWith, first, flatMap, flatMapWith, fold, foreach, foreachPartition, foreachWith, getCheckpointFile, getStorageLevel, glom, groupBy, groupBy, groupBy, id, intersection, intersection, intersection, isCheckpointed, iterator, keyBy, map, mapPartitions, mapPartitionsWithContext, mapPartitionsWithIndex, mapPartitionsWithSplit, mapWith, max, min, name, partitioner, partitions, persist, persist, pipe, pipe, pipe, preferredLocations, randomSplit, reduce, repartition, sample, saveAsObjectFile, saveAsTextFile, saveAsTextFile, setName, sparkContext, subtract, subtract, subtract, take, takeOrdered, takeSample, toArray, toDebugString, toJavaRDD, toLocalIterator, top, toString, union, unpersist, zip, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipPartitions, zipWithIndex, zipWithUniqueId
initialized, initializeIfNecessary, initializeLogging, initLock, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logTrace, logTrace, logWarning, logWarning
public HadoopRDD(SparkContext sc, Broadcast<SerializableWritable<org.apache.hadoop.conf.Configuration>> broadcastedConf, scala.Option<scala.Function1<org.apache.hadoop.mapred.JobConf,scala.runtime.BoxedUnit>> initLocalJobConfFuncOpt, Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, Class<K> keyClass, Class<V> valueClass, int minPartitions)
public HadoopRDD(SparkContext sc, org.apache.hadoop.mapred.JobConf conf, Class<? extends org.apache.hadoop.mapred.InputFormat<K,V>> inputFormatClass, Class<K> keyClass, Class<V> valueClass, int minPartitions)
public static Object getCachedMetadata(String key)
public static boolean containsCachedMetadata(String key)
public static Object putCachedMetadata(String key, Object value)
public static void addLocalConfiguration(String jobTrackerId, int jobId, int splitId, int attemptId, org.apache.hadoop.mapred.JobConf conf)
public Partition[] getPartitions()
RDD
public InterruptibleIterator<scala.Tuple2<K,V>> compute(Partition theSplit, TaskContext context)
RDD
public scala.collection.Seq<String> getPreferredLocations(Partition split)
RDD
public void checkpoint()
RDD
checkpoint
in class RDD<scala.Tuple2<K,V>>
public org.apache.hadoop.conf.Configuration getConf()