public class RandomRDDs
extends Object
Constructor and Description |
---|
RandomRDDs() |
Modifier and Type | Method and Description |
---|---|
static JavaDoubleRDD |
normalJavaRDD(JavaSparkContext jsc,
long size)
normalJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default number of partitions and the default seed. |
static JavaDoubleRDD |
normalJavaRDD(JavaSparkContext jsc,
long size,
int numPartitions)
normalJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default seed. |
static JavaDoubleRDD |
normalJavaRDD(JavaSparkContext jsc,
long size,
int numPartitions,
long seed)
Java-friendly version of
normalRDD(org.apache.spark.SparkContext, long, int, long) . |
static JavaRDD<Vector> |
normalJavaVectorRDD(JavaSparkContext jsc,
long numRows,
int numCols)
normalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default number of partitions and the default seed. |
static JavaRDD<Vector> |
normalJavaVectorRDD(JavaSparkContext jsc,
long numRows,
int numCols,
int numPartitions)
normalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default seed. |
static JavaRDD<Vector> |
normalJavaVectorRDD(JavaSparkContext jsc,
long numRows,
int numCols,
int numPartitions,
long seed)
Java-friendly version of
normalVectorRDD(org.apache.spark.SparkContext, long, int, int, long) . |
static RDD<Object> |
normalRDD(SparkContext sc,
long size,
int numPartitions,
long seed)
Generates an RDD comprised of i.i.d.
|
static RDD<Vector> |
normalVectorRDD(SparkContext sc,
long numRows,
int numCols,
int numPartitions,
long seed)
Generates an RDD[Vector] with vectors containing i.i.d.
|
static JavaDoubleRDD |
poissonJavaRDD(JavaSparkContext jsc,
double mean,
long size)
poissonJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default number of partitions and the default seed. |
static JavaDoubleRDD |
poissonJavaRDD(JavaSparkContext jsc,
double mean,
long size,
int numPartitions)
poissonJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long) with the default seed. |
static JavaDoubleRDD |
poissonJavaRDD(JavaSparkContext jsc,
double mean,
long size,
int numPartitions,
long seed)
Java-friendly version of
poissonRDD(org.apache.spark.SparkContext, double, long, int, long) . |
static JavaRDD<Vector> |
poissonJavaVectorRDD(JavaSparkContext jsc,
double mean,
long numRows,
int numCols)
poissonJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long) with the default number of partitions and the default seed. |
static JavaRDD<Vector> |
poissonJavaVectorRDD(JavaSparkContext jsc,
double mean,
long numRows,
int numCols,
int numPartitions)
|
static JavaRDD<Vector> |
poissonJavaVectorRDD(JavaSparkContext jsc,
double mean,
long numRows,
int numCols,
int numPartitions,
long seed)
Java-friendly version of
poissonVectorRDD(org.apache.spark.SparkContext, double, long, int, int, long) . |
static RDD<Object> |
poissonRDD(SparkContext sc,
double mean,
long size,
int numPartitions,
long seed)
Generates an RDD comprised of i.i.d.
|
static RDD<Vector> |
poissonVectorRDD(SparkContext sc,
double mean,
long numRows,
int numCols,
int numPartitions,
long seed)
Generates an RDD[Vector] with vectors containing i.i.d.
|
static <T> RDD<T> |
randomRDD(SparkContext sc,
RandomDataGenerator<T> generator,
long size,
int numPartitions,
long seed,
scala.reflect.ClassTag<T> evidence$1)
:: DeveloperApi ::
Generates an RDD comprised of i.i.d.
|
static RDD<Vector> |
randomVectorRDD(SparkContext sc,
RandomDataGenerator<Object> generator,
long numRows,
int numCols,
int numPartitions,
long seed)
:: DeveloperApi ::
Generates an RDD[Vector] with vectors containing i.i.d.
|
static JavaDoubleRDD |
uniformJavaRDD(JavaSparkContext jsc,
long size)
uniformJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default number of partitions and the default seed. |
static JavaDoubleRDD |
uniformJavaRDD(JavaSparkContext jsc,
long size,
int numPartitions)
uniformJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long) with the default seed. |
static JavaDoubleRDD |
uniformJavaRDD(JavaSparkContext jsc,
long size,
int numPartitions,
long seed)
Java-friendly version of
uniformRDD(org.apache.spark.SparkContext, long, int, long) . |
static JavaRDD<Vector> |
uniformJavaVectorRDD(JavaSparkContext jsc,
long numRows,
int numCols)
uniformJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default number of partitions and the default seed. |
static JavaRDD<Vector> |
uniformJavaVectorRDD(JavaSparkContext jsc,
long numRows,
int numCols,
int numPartitions)
uniformJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long) with the default seed. |
static JavaRDD<Vector> |
uniformJavaVectorRDD(JavaSparkContext jsc,
long numRows,
int numCols,
int numPartitions,
long seed)
Java-friendly version of
uniformVectorRDD(org.apache.spark.SparkContext, long, int, int, long) . |
static RDD<Object> |
uniformRDD(SparkContext sc,
long size,
int numPartitions,
long seed)
Generates an RDD comprised of i.i.d.
|
static RDD<Vector> |
uniformVectorRDD(SparkContext sc,
long numRows,
int numCols,
int numPartitions,
long seed)
Generates an RDD[Vector] with vectors containing i.i.d.
|
public static RDD<Object> uniformRDD(SparkContext sc, long size, int numPartitions, long seed)
U(0.0, 1.0)
.
To transform the distribution in the generated RDD from U(0.0, 1.0)
to U(a, b)
, use
RandomRDDs.uniformRDD(sc, n, p, seed).map(v => a + (b - a) * v)
.
sc
- SparkContext used to create the RDD.size
- Size of the RDD.numPartitions
- Number of partitions in the RDD (default: sc.defaultParallelism
).seed
- Random seed (default: a random long integer).U(0.0, 1.0)
.public static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc, long size, int numPartitions, long seed)
uniformRDD(org.apache.spark.SparkContext, long, int, long)
.public static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc, long size, int numPartitions)
uniformJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long)
with the default seed.public static JavaDoubleRDD uniformJavaRDD(JavaSparkContext jsc, long size)
uniformJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long)
with the default number of partitions and the default seed.public static RDD<Object> normalRDD(SparkContext sc, long size, int numPartitions, long seed)
To transform the distribution in the generated RDD from standard normal to some other normal
N(mean, sigma^2^)
, use RandomRDDs.normalRDD(sc, n, p, seed).map(v => mean + sigma * v)
.
sc
- SparkContext used to create the RDD.size
- Size of the RDD.numPartitions
- Number of partitions in the RDD (default: sc.defaultParallelism
).seed
- Random seed (default: a random long integer).public static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc, long size, int numPartitions, long seed)
normalRDD(org.apache.spark.SparkContext, long, int, long)
.public static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc, long size, int numPartitions)
normalJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long)
with the default seed.public static JavaDoubleRDD normalJavaRDD(JavaSparkContext jsc, long size)
normalJavaRDD(org.apache.spark.api.java.JavaSparkContext, long, int, long)
with the default number of partitions and the default seed.public static RDD<Object> poissonRDD(SparkContext sc, double mean, long size, int numPartitions, long seed)
sc
- SparkContext used to create the RDD.mean
- Mean, or lambda, for the Poisson distribution.size
- Size of the RDD.numPartitions
- Number of partitions in the RDD (default: sc.defaultParallelism
).seed
- Random seed (default: a random long integer).public static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc, double mean, long size, int numPartitions, long seed)
poissonRDD(org.apache.spark.SparkContext, double, long, int, long)
.public static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc, double mean, long size, int numPartitions)
poissonJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long)
with the default seed.public static JavaDoubleRDD poissonJavaRDD(JavaSparkContext jsc, double mean, long size)
poissonJavaRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, long)
with the default number of partitions and the default seed.public static <T> RDD<T> randomRDD(SparkContext sc, RandomDataGenerator<T> generator, long size, int numPartitions, long seed, scala.reflect.ClassTag<T> evidence$1)
sc
- SparkContext used to create the RDD.generator
- RandomDataGenerator used to populate the RDD.size
- Size of the RDD.numPartitions
- Number of partitions in the RDD (default: sc.defaultParallelism
).seed
- Random seed (default: a random long integer).public static RDD<Vector> uniformVectorRDD(SparkContext sc, long numRows, int numCols, int numPartitions, long seed)
U(0.0, 1.0)
.
sc
- SparkContext used to create the RDD.numRows
- Number of Vectors in the RDD.numCols
- Number of elements in each Vector.numPartitions
- Number of partitions in the RDD.seed
- Seed for the RNG that generates the seed for the generator in each partition.U(0.0, 1.0)
.public static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols, int numPartitions, long seed)
uniformVectorRDD(org.apache.spark.SparkContext, long, int, int, long)
.public static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols, int numPartitions)
uniformJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long)
with the default seed.public static JavaRDD<Vector> uniformJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols)
uniformJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long)
with the default number of partitions and the default seed.public static RDD<Vector> normalVectorRDD(SparkContext sc, long numRows, int numCols, int numPartitions, long seed)
sc
- SparkContext used to create the RDD.numRows
- Number of Vectors in the RDD.numCols
- Number of elements in each Vector.numPartitions
- Number of partitions in the RDD (default: sc.defaultParallelism
).seed
- Random seed (default: a random long integer).N(0.0, 1.0)
.public static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols, int numPartitions, long seed)
normalVectorRDD(org.apache.spark.SparkContext, long, int, int, long)
.public static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols, int numPartitions)
normalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long)
with the default seed.public static JavaRDD<Vector> normalJavaVectorRDD(JavaSparkContext jsc, long numRows, int numCols)
normalJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, long, int, int, long)
with the default number of partitions and the default seed.public static RDD<Vector> poissonVectorRDD(SparkContext sc, double mean, long numRows, int numCols, int numPartitions, long seed)
sc
- SparkContext used to create the RDD.mean
- Mean, or lambda, for the Poisson distribution.numRows
- Number of Vectors in the RDD.numCols
- Number of elements in each Vector.numPartitions
- Number of partitions in the RDD (default: sc.defaultParallelism
)seed
- Random seed (default: a random long integer).public static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols, int numPartitions, long seed)
poissonVectorRDD(org.apache.spark.SparkContext, double, long, int, int, long)
.public static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols, int numPartitions)
public static JavaRDD<Vector> poissonJavaVectorRDD(JavaSparkContext jsc, double mean, long numRows, int numCols)
poissonJavaVectorRDD(org.apache.spark.api.java.JavaSparkContext, double, long, int, int, long)
with the default number of partitions and the default seed.public static RDD<Vector> randomVectorRDD(SparkContext sc, RandomDataGenerator<Object> generator, long numRows, int numCols, int numPartitions, long seed)
sc
- SparkContext used to create the RDD.generator
- RandomDataGenerator used to populate the RDD.numRows
- Number of Vectors in the RDD.numCols
- Number of elements in each Vector.numPartitions
- Number of partitions in the RDD (default: sc.defaultParallelism
).seed
- Random seed (default: a random long integer).