>>> from pyspark.sql.types import *
>>> from pyspark.sql.functions import array
>>> tag=array(lit("oracle"),lit("java")
>>> df2.withColumn("tags",tag).show()
结果:
|gender| ethinicity|first_name|count|rank|year| tag|
+------+--------------+----------+-----+----+----+--------------+
| MALE| HISPANIC| JAYDEN| 364| 1|2012|[oracle, java]|
| MALE|WHITE NON HISP| JOSEPH| 300| 2|2012|[oracle, java]|
| MALE|WHITE NON HISP| JOSEPH| 300| 2|2012|[oracle, java]|
| MALE| HISPANIC| JACOB| 293| 4|2012|[oracle, java]|
| MALE| HISPANIC| JACOB| 293| 4|2012|[oracle, java]|
| MALE|WHITE NON HISP| DAVID| 289| 6|2012|[oracle, java]|
| MALE|WHITE NON HISP| DAVID| 289| 6|2012|[oracle, java]|
| MALE| HISPANIC| MATTHEW| 279| 8|2012|[oracle, java]|
| MALE| HISPANIC| MATTHEW| 279| 8|2012|[oracle, java]|
| MALE| HISPANIC| ETHAN| 254| 10|2012|[oracle, java]|
| MALE| HISPANIC| ETHAN| 254| 10|2012|[oracle, java]|
| MALE|WHITE NON HISP| MICHAEL| 245| 12|2012|[oracle, java]|
| MALE|WHITE NON HISP| MICHAEL| 245| 12|2012|[oracle, java]|
| MALE|WHITE NON HISP| JACOB| 242| 14|2012|[oracle, java]|
| MALE|WHITE NON HISP| JACOB| 242| 14|2012|[oracle, java]|
| MALE|WHITE NON HISP| MOSHE| 238| 16|2012|[oracle, java]|
| MALE|WHITE NON HISP| MOSHE| 238| 16|2012|[oracle, java]|
| MALE| HISPANIC| ANGEL| 236| 18|2012|[oracle, java]|
| MALE| HISPANIC| AIDEN| 235| 19|2012|[oracle, java]|
| MALE|WHITE NON HISP| DANIEL| 232| 20|2012|[oracle, java]|
+------+--------------+----------+-----+----+----+--------------+
only showing top 20 rows
>>> arr=["oracle","java"]
>>> mp=[ (lambda x:lit(x))(x) for x in arr ]
>>> df.withColumn("mk",array(mp)).show()
+------+---+----------+----------+--------------+
| name|age| role|experience| mk|
+------+---+----------+----------+--------------+
| John| 25| Developer| 2.56|[oracle, java]|
| Scott| 30| Tester| 5.2|[oracle, java]|
| Jim| 28| DBA| 3.0|[oracle, java]|
| Mike| 35|Consultant| 10.0|[oracle, java]|
|Daniel| 26| Developer| 3.2|[oracle, java]|
| Paul| 29| Tester| 3.6|[oracle, java]|
| Peter| 30| Developer| 6.5|[oracle, java]|
+------+---+----------+----------+--------------+
【参考】https://stackoverflow.com/questions/59532087/pyspark-equivalent-of-adding-a-constant-array-to-a-dataframe-as-column