pyspark中list转换dataframe

通过pandas

通过pandas来做转换

import pandas as pd
from pyspark.sql import SparkSession

# list原始数据
data_list = [l'zhang', 12], ['wang', 9], ['li', 3], ['zhao', 6]]

#pandas转换
data_df = pd.DataFrame(data_list, columns=['user_name', 'user_id'])

#pyspark转换
spark = SparkSession.builder.enableHiveSupport().get0rCreate()
data_df = spark.createDataFrame(data_ df)

直接用pyspark

from pyspark.sql import SparkSession
from pyspark.sql.types import IntegerType, StringType, StructType, StructField

# list原始数据
data_list = [('zhang', 12),('wang', 9)('li', 3),('zhao',6)]

#定义表头
schema = StructType([
		StructField("user_name",StringType()True),
		StructField("user_id", IntegerType(),True)
		])

#pyspark转换
spark = SparkSession.builder.enableHiveSupport().getorCreate()
data_df = spark.createDataFrame(data_list, schema)

你可能感兴趣的:(pyspark,spark)