#table_name:表名
#column:列名->类型
#location:数据存储位置
#partitionColumn:分区名->类型
#field_delimit:列分隔符
#is_parquet_type:是否parquet
def createTable(table_name, column, location, partitionColumn, field_delimit, is_parquet_type = False):
conn = MySQLdb.connect(host='xxx', port=xxx, user='xxx', passwd='xxx')
conn.select_db('xxx')
cursor = conn.cursor()
sql = "select TBL_ID from TBLS order by TBL_ID desc limit 1"
cursor.execute(sql)
result = cursor.fetchone()
TBL_ID = result[0] + 1
sql = "select SD_ID from SDS order by SD_ID desc limit 1"
cursor.execute(sql)
result = cursor.fetchone()
SD_ID = result[0] + 1
print TBL_ID, SD_ID
createTime = int(time.time())
DB_ID = 1
lastAccessTime = 0
owner = 'root'
retention = 0
inputFormat = 'org.apache.hadoop.mapred.TextInputFormat'
outputFormat = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
tblType = 'EXTERNAL_TABLE'
c_str = ""
i = 0
columns = json.loads(column)
for c in columns:
print c
c_str += "("
c_str += str(TBL_ID) + "," + "NULL,'" + c[0] + "','" + c[1] + "'," + str(i)
i = i + 1
c_str += "),"
c_str = c_str[:-1]
print c_str
pc = json.loads(partitionColumn)
pc_str = ""
i = 0
for c in pc:
print c
pc_str += "("
pc_str += str(TBL_ID) + "," + "NULL,'" + c[0] + "','" + c[1] + "'," + str(i)
i = i + 1
pc_str += "),"
pc_str = pc_str[:-1]
print pc_str
insertCDSQL = "insert into CDS(`CD_ID`) values ("+str(TBL_ID)+")"
cursor.execute(insertCDSQL)
if is_parquet_type :
insertSerdesSQL = "insert into SERDES (`SERDE_ID`,`NAME`,`SLIB`) VALUES ("+str(SD_ID)+",NULL,'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe')"
else:
insertSerdesSQL = "insert into SERDES (`SERDE_ID`,`NAME`,`SLIB`) VALUES ("+str(SD_ID)+",NULL,'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe')"
print insertSerdesSQL
cursor.execute(insertSerdesSQL)
insertSerdeParamsSQL = "insert into SERDE_PARAMS (`SERDE_ID`,`PARAM_KEY`,`PARAM_VALUE`) values ("+str(SD_ID)+",'field.delim','"+field_delimit+"'),("+str(SD_ID)+",'serialization.format','"+field_delimit+"')"
print insertSerdeParamsSQL
cursor.execute(insertSerdeParamsSQL)
insertSdSQL = "insert into SDS(`SD_ID`,`CD_ID`,`INPUT_FORMAT`,`IS_COMPRESSED`,`IS_STOREDASSUBDIRECTORIES`,`LOCATION`,`NUM_BUCKETS`,`OUTPUT_FORMAT`,`SERDE_ID`) values ("+ str(SD_ID) +"," +\
str(TBL_ID)+",'"+inputFormat+"','','','"+location+"',-1,'"+outputFormat+"',"+str(SD_ID)+")"
print insertSdSQL
cursor.execute(insertSdSQL)
insertTableSQL = "insert into TBLS(`TBL_ID`,`CREATE_TIME`,`DB_ID`,`LAST_ACCESS_TIME`,`OWNER`,`RETENTION`,`SD_ID`,`TBL_NAME`,`TBL_TYPE`,`VIEW_EXPANDED_TEXT`,`VIEW_ORIGINAL_TEXT`) values ("+ str(TBL_ID) +","+ str(createTime) +","+ str(DB_ID) +\
","+ str(lastAccessTime) +",'"+owner+"',"+str(retention)+","+ str(SD_ID) +",'"+ table_name +"','"+tblType+"',NULL,NULL)"
print insertTableSQL
cursor.execute(insertTableSQL)
insertTblParamsSQL = "insert into TABLE_PARAMS (`TBL_ID`,`PARAM_KEY`,`PARAM_VALUE`) values ("+str(TBL_ID)+",'EXTERNAL','TRUE'),("+str(TBL_ID)+",'transient_lastDdlTime',"+str(createTime)+")"
print insertTblParamsSQL
cursor.execute(insertTblParamsSQL)
insertColumnSQL = "insert into COLUMNS_V2 (`CD_ID`,`COMMENT`,`COLUMN_NAME`,`TYPE_NAME`,`INTEGER_IDX`) values" + c_str
print insertColumnSQL
cursor.execute(insertColumnSQL)
insertPartionKeysSQL = "insert into PARTITION_KEYS(`TBL_ID`,`PKEY_COMMENT`,`PKEY_NAME`,`PKEY_TYPE`,`INTEGER_IDX`) values" + pc_str
print insertPartionKeysSQL
cursor.execute(insertPartionKeysSQL)
conn.commit()
cursor.close()
conn.close()