“DataType”, “NullType”, “StringType”, “BinaryType”, “BooleanType”, “DateType”,
“TimestampType”, “DecimalType”, “DoubleType”, “FloatType”, “ByteType”, “IntegerType”,
“LongType”, “ShortType”, “ArrayType”, “MapType”, “StructField”, “StructType”
class StructField(DataType):
"""A field in :class:`StructType`.
:param name: string, name of the field.
:param dataType: :class:`DataType` of the field.
:param nullable: boolean, whether the field can be null (None) or not.
:param metadata: a dict from string to simple type that can be toInternald to JSON automatically
"""
def __init__(self, name, dataType, nullable=True, metadata=None):
"""
>>> (StructField("f1", StringType(), True)
... == StructField("f1", StringType(), True))
True
>>> (StructField("f1", StringType(), True)
... == StructField("f2", StringType(), True))
False
"""
assert isinstance(dataType, DataType), "dataType should be DataType"
assert isinstance(name, basestring), "field name should be string"
if not isinstance(name, str):
name = name.encode('utf-8')
self.name = name
self.dataType = dataType
self.nullable = nullable
self.metadata = metadata or {}
指定说明每个DataFrame的数据类型。
val schema = StructType(
List(
StructField("id", IntegerType, true),
StructField("name", StringType, true),
StructField("age", IntegerType, true)
)
)
//将RDD映射到rowRDD
val rowRDD = personRDD.map(p => Row(p(0).toInt, p(1).trim, p(2).toInt))
//将schema信息应用到rowRDD上
val personDataFrame = sqlContext.createDataFrame(rowRDD, schema)
参考: