spark中DataFrame读取hive之UDF函数去掉空行

 
  
 
   
 
     
 
     
import org.apache.spark.sql.SparkSession
val spark = SparkSession. builder ().master( "spark://192.168.0.0:7077" ).enableHiveSupport().getOrCreate()
val sc = spark.sparkContext
val sqlContext = spark. sqlContext
//定义UDF函数
def stringLength(phone: String ): Boolean ={
 
if (phone.length == 0 || phone.isEmpty == true ){
   
false
          } else {
   
true
  }
}
//对SQL可见
sqlContext.udf.register(
"stringLength" , (phone: String ) => stringLength(phone))
def acceptData(src:String,dst:String ,day:String) ={     spark.sql(s"""select $src as src  , $dst as dst from cdr where day='$day' and answerdur>0.3 and stringLength($src) and stringLength($dst)""") } val directMerge=acceptData("callingnum","callednum","20171127")
//读取某一列并存入hive表中,
val accept =directMerge.select( "src" , "dst" )
//dauresult不需要提前创建
accept.write.format("parquet").mode("overwrite").saveAsTable("default.dayresult")
 
     



 
     



你可能感兴趣的:(DataFrame)