map(Row(_))错误,需要用Row.fromSeq(_)

 

 val rddRow = rdd.map(Row(_))
 spark.createDataFrame(rddRow,schema)

Caused by: java.lang.RuntimeException: Error while encoding: java.lang.RuntimeException: [Ljava.lang.String; is not a valid external type for schema of string
if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, _c0), StringType), true) AS _c0#0
+- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt) null else staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object), 0, _c0), StringType), true)
   :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object).isNullAt
   :  :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row object)
   :  :  +- input[0, org.apache.spark.sql.Row, true]
   :  +- 0
   :- null

需要用

Row.fromSeq(_)
val rddRow = rdd.map(Row.fromSeq(_))
spark.createDataFrame(rddRow,schema)

 

你可能感兴趣的:(spark)