# df is a DataFrame
def lowerCase(string):
return string.strip().lower()
lowerCaseUDF = udf(lowerCase, StringType())
for (columnName, kind) in df.dtypes:
if(kind == "string"):
df = df.withColumn(columnName, lowerCaseUDF(df[columnName]))
df.select("Tipo_unidad").distinct().show()
def recent_six_months(clrq):
try:
time.strptime(clrq, "%Y-%m-%d")
clrq_date_time = datetime.datetime.strptime(clrq, '%Y-%m-%d')
now = datetime.datetime(2018,5,1)
d2 = (now-clrq_date_time).days
if float(d2/30) <= 6:
return 'in-6'
else:
return 'not-in-6'
except:
return 'not-in-6'
# def is_valid_date(str):
# '''判断是否是一个有效的日期字符串'''
# try:
# time.strptime(str, "%Y-%m-%d")
# return True
# except:
# return False
参考:overstackflow链接