工作需要做查询并导出数据的接口,考虑了SpringBoot + Spark 和 Impala,Spark只测试了本地模式,时间有限暂时没有测试yarn模式,但是Spark更适合做数据分析,查询Impala 是比较高效的,选择了以 Impala 做查询引擎。这里整合下 Impala 做个记录。不过因为项目有其他处理模块,所以只把Impala部分拆分出来,是完整的独立模块。
目录
层级关系
package com.sm
import org.springframework.boot.SpringApplication
import org.springframework.boot.autoconfigure.{EnableAutoConfiguration, SpringBootApplication}
import org.springframework.context.annotation.ComponentScan
/**
* SpringBoot 入口
*
* create by LiuJinHe 2019/9/23
*/
@EnableAutoConfiguration
@ComponentScan
@SpringBootApplication
class CrowdPackageApp
object CrowdPackageApp extends App {
SpringApplication.run(classOf[CrowdPackageApp])
}
package com.sm.controller
import com.sm.service.CrowdService
import com.sm.service.impl.CrowdServiceImpl
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.web.bind.annotation.{CrossOrigin, RequestMapping, RequestMethod, RequestParam, ResponseBody, RestController}
/**
* 请求映射地址
*
* create by LiuJinHe 2019/9/23
*/
@RestController
@CrossOrigin
class CrowdController @Autowired()(crowdService:CrowdService){
/**
* 人群包导出请求
*/
@RequestMapping(value = Array("/crowd"), method = Array(RequestMethod.GET))
@ResponseBody
def exportCrowd(@RequestParam("channel_id") channelId: Int, @RequestParam("cp_game_ids") cpGameIds: String,
@RequestParam("action_type") actionType: Int, @RequestParam("os") os: Int,
@RequestParam("begin_time") beginTime: String, @RequestParam("end_time") endTime: String): String = {
crowdService.reqCrowd(channelId, cpGameIds, actionType, os, beginTime, endTime)
}
}
首先是 Trait,方便以后业务扩展。
package com.sm.service
/**
* 业务抽象类
*
* create by LiuJinHe 2019/9/24
*/
trait CrowdService {
def reqCrowd(channelId: Int, cpGameIds: String, actionType: Int, os: Int, beginTime: String, endTime: String):String
}
实现类
有其他业务代码,这里只拆分了Impala的。
package com.sm.service.impl
import scala.io.Source
/**
* 调用查询命令的实现类
*
* create by LiuJinHe 2019/9/24
*/
@Service("crowdService")
class CrowdServiceImpl extends CrowdService{
private val logger = LoggerFactory.getLogger(classOf[CrowdServiceImpl])
var statSql:String = _
var sqlStr: String = _
var result = new Result[String]
var dataType: String = _
override def reqCrowd(channelId: Int, cpGameIds: String, actionType: Int, os: Int, beginTime: String, endTime: String): String = {
var queryInfo = ""
// 校验参数
result = ParamsUtils.checkParams(channelId, cpGameIds, actionType, os, beginTime, endTime)
if (result.getStatusCode != Result.FAILED) {
// 判断查询类型
if (actionType.toInt == 1) {
// 激活模块
dataType = "IMEI"
statSql = "refresh cp_data.tb_sdk_active_log"
activeAnalysis(cpGameIds, os, beginTime, endTime)
} else if (actionType.toInt == 2) {
// 付费模块
dataType = "IDFA"
statSql = "refresh cp_data.tb_sdk_user_payment"
paymentAnalysis(cpGameIds, os, beginTime, endTime)
} else logger.info("错误参数,参数必须为active or payment!")
// 执行SQL 导出结果
logger.debug("执行SQL:" + sqlStr)
queryInfo = ImpalaConnect.queryWithImpala(statSql, sqlStr)
result.setMsg(queryInfo)
logger.info("查询并导出结果成功")
}
// 执行结果
result.getMsg
}
/**
* 付费,解析参数为 SQL 语句
*/
def paymentAnalysis(cpGameIds: String, os: Int, beginTime: String, endTime: String): String = {
logger.info("构建付费 SQL 命令")
sqlStr =
s"""
|select distinct tab.device_id from cp_data.tb_sdk_user_payment tab,cp_data.tb_base_game_conf conf where tab.game_id = conf.game_id
| and tab.`date` between '$beginTime' and '$endTime'
| and conf.cp_game_id in ($cpGameIds)
""".stripMargin
if (os.equals(1)) {
sqlStr += s"and right(cast(tab.package_id as string),2) != '99';"
} else if (os.equals(2)) {
sqlStr += s"and right(cast(tab.package_id as string),2) = '99';"
} else result = new Result[String](Result.FAILED, "构建sql时发现os类型错误")
sqlStr
}
/**
* 激活,解析参数为 SQL 语句
*/
def activeAnalysis(cpGameIds: String, os: Int, beginTime: String, endTime: String): String = {
logger.info("构建激活 SQL 命令")
sqlStr =
s"""
|select distinct tab.device_id from cp_data.tb_sdk_active_log tab,cp_data.tb_base_game_conf conf where tab.game_id = conf.game_id
| and tab.`date` between '$beginTime' and '$endTime'
| and tab.os = '$os'
| and conf.cp_game_id in ($cpGameIds)
""".stripMargin
sqlStr
}
}
package com.sm.execute
import java.io.{File, FileWriter}
import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet, SQLException}
import com.sm.constants.Constants
import org.slf4j.{Logger, LoggerFactory}
/**
* Scala 连接 Impala
*
* create by LiuJinHe 2019/9/24
*/
object ImpalaConnect {
private val logger: Logger = LoggerFactory.getLogger(ImpalaConnect.getClass)
def queryWithImpala(statSql: String, sqlStr: String): String = {
val date = System.currentTimeMillis()
var conn: Connection = null
var prepareStat: PreparedStatement = null
var result: ResultSet = null
// 生成文件
val file = new File(Constants.IMPALA_OUT_PATH)
val fileWriter = new FileWriter(file)
// SQL
println("Impala开始执行sql查询")
try {
// 连接 IMPALA 查询
Class.forName(Constants.IMPALA_JDBC)
conn = DriverManager.getConnection(Constants.IMPALA_URL, Constants.IMPALA_USER, Constants.IMPALA_PASSWORD)
prepareStat = conn.prepareStatement(sqlStr)
val query= prepareStat.executeQuery
println("刷新表成功")
prepareStat = conn.prepareStatement(sqlStr)
result = prepareStat.executeQuery
// 写入文件
var count = 0
while (result.next) {
val str = result.getString(1)
fileWriter.write( str + "\n")
fileWriter.flush()
count += 1
}
println("导出结果成功,总条数: " + count)
} catch {
case e: Exception =>
logger.info("jdbc查询impala失败", e)
} finally try {
disConnect(conn, result, prepareStat)
fileWriter.close()
}
"查询成功!"
}
def disConnect(connection: Connection, rs: ResultSet, ps: PreparedStatement): Unit = {
try {
if (rs != null) rs.close()
if (ps != null) ps.close()
if (connection != null) {
connection.close()
}
} catch {
case e: SQLException =>
logger.info("jdbc连接关闭失败", e)
}
}
}
package com.sm.config
import org.springframework.context.annotation.Configuration
import org.springframework.web.servlet.config.annotation.{CorsRegistry, WebMvcConfigurer}
/**
* create by LiuJinHe 2019/9/24
*/
@Configuration
class CrowdConfig extends WebMvcConfigurer {
override def addCorsMappings(registry: CorsRegistry): Unit = {
registry.addMapping("/**")
.allowedOrigins("*")
.allowCredentials(true)
.allowedMethods("GET", "POST", "PUT", "DELETE", "OPTIONS")
.maxAge(3600)
}
}
package com.sm.common
import java.io.Serializable
import scala.beans.BeanProperty
/**
* 请求结果
*
* create by LiuJinHe 2019/9/23
*/
@SerialVersionUID(1L)
object Result {
val SUCCESS = 1
val FAILED = 0
val DEFAULT_SUCCESS_MESSAGE = "success"
val DEFAULT_FAILED_MESSAGE = "failed"
def SuccessResult[T] = new Result[T](SUCCESS, DEFAULT_SUCCESS_MESSAGE)
def SuccessResult[T](msg: String) = new Result[T](SUCCESS, msg)
def SuccessResult[T](data: T) = new Result[T](SUCCESS, DEFAULT_SUCCESS_MESSAGE, data)
def SuccessResult[T](msg: String, data: T) = new Result[T](SUCCESS, msg, data)
def FailedResult[T] = new Result[T](FAILED, DEFAULT_FAILED_MESSAGE)
def FailedResult[T](msg: String) = new Result[T](FAILED, msg)
def FailedResult[T](data: T) = new Result[T](FAILED, DEFAULT_FAILED_MESSAGE, data)
def FailedResult[T](msg: String, data: T) = new Result[T](FAILED, msg, data)
}
@SerialVersionUID(1L)
class Result[T] extends Serializable {
@BeanProperty var statusCode: Int = 0
@BeanProperty var msg: String = _
@BeanProperty var data: T = _
this.statusCode = Result.SUCCESS
this.msg = Result.DEFAULT_SUCCESS_MESSAGE
def this(statusCode: Int, msg: String) {
this()
this.statusCode = statusCode
this.msg = msg
}
def this(statusCode: Int, msg: String, data: T) {
this()
this.statusCode = statusCode
this.msg = msg
this.data = data
}
}
package com.sm.constants
import java.io.InputStream
import java.util.Properties
/**
* create by LiuJinHe 2019/9/24
*/
object Constants {
var IMPALA_JDBC: String = _
var IMPALA_URL: String = _
var IMPALA_USER: String = _
var IMPALA_PASSWORD: String = _
var IMPALA_OUT_PATH:String = _
var in: InputStream = _
try {
in = Constants.getClass.getClassLoader.getResourceAsStream("conf.properties")
val prop: Properties = new Properties()
prop.load(in)
IMPALA_JDBC = prop.getProperty("impala.jdbc")
IMPALA_URL = prop.getProperty("impala.url")
IMPALA_USER = prop.getProperty("impala.user")
IMPALA_PASSWORD = prop.getProperty("impala.password")
IMPALA_OUT_PATH = prop.getProperty("impala.out.path")
}
catch {
case e: Exception =>
e.printStackTrace()
} finally
in.close()
}
常用配置其实可以放到application.properties/yml 中,启动SpringBoot时加载,不过这里还有一些其他配置,就直接放到一个配置文件里一起加载。
impala.jdbc = com.cloudera.impala.jdbc41.Driver
impala.url = jdbc:impala://xxx:21050
impala.user = hive
impala.password = hive
impala.out.path = E:\\testdata\\impala_out_result.txt
这里就作为初始设置,按需求可以放入其他需要启动加载的配置,比如可以把Hadoop、Zookeeper、Redis、Kafka等配置添加启动加载。
server.port = 8081
server.max-http-header-size = 10000000
server.use-forward-headers = true
server.tomcat.remote-ip-header = X-Real-IP
server.tomcat.protocol-header = X-Forwarded-Proto
spring.servlet.multipart.maxFileSize = -1
spring.servlet.multipart.maxRequestSize = -1
yml类型配置
server:
port: 8081
max-http-header-size: 1000000
tomcat:
protocol-header: X-Forwarded-Proto
remote-ip-header: X-Real-IP
spring:
application:
name: crowd-package-server
servlet:
multipart:
enabled: true
max-request-size: -1
max-file-size: -1
UTF-8
1.7.28
2.12.1
2.11.12
org.springframework.boot
spring-boot-starter-parent
2.1.7.RELEASE
org.slf4j
slf4j-nop
${slf4j.version}
org.scala-lang
scala-library
${scala.version}
com.clodera.impala
impalajdbc41
2.6.15
com.google.code.gson
gson
2.8.5
org.springframework.boot
spring-boot-starter
org.springframework.boot
spring-boot-starter-logging
org.springframework.boot
spring-boot-starter-aop
org.springframework.boot
spring-boot-starter-web
org.springframework.boot
spring-boot-configuration-processor
true
mysql
mysql-connector-java
5.1.47
com.google.protobuf
protobuf-java
2.6.1
commons-codec
commons-codec
1.13
net.alchim31.maven
scala-maven-plugin
3.2.2
scala-compile-first
process-resources
add-source
compile
org.springframework.boot
spring-boot-maven-plugin
true
打包上传
java -jar crowd-package-server-1.0.jar
PostMan测试: