第一步:连接clickhouse
第二步:读取CSV
第三步:转换成与clickhouse相匹配的数据
第四步:插入,3万行耗时2.3s
from clickhouse_driver import Client
import types
import time,datetime
from datetime import date
import csv
client = Client(host='192.168.1.231',database='test6',user='default',password='')
creattable="""CREATE TABLE test1 (\
consumption_id UInt64,\
member_id UInt64,\
fans_id UInt64,\
bill_date Date,\
money Float32,\
people_num UInt8,\
dish_name String,\
created_org UInt8,\
open_id String,\
subscribed_time DateTime,\
unsubscribed_time DateTime,\
source_type UInt8,\
sns_type UInt8,\
is_subscribed UInt8\
)ENGINE=MergeTree(bill_date,(consumption_id,created_org),8192)"""
client.execute('DROP TABLE IF EXISTS test1')
data=[]
with open(r'C:\Users\Administrator\Desktop\test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
for row in readCSV:
row[0]=int(row[0])
row[1]=int(row[1])
row[2]=int(row[2])
x=row[3].split('/')
row[3]=date(int(x[0]),int(x[1]),int(x[2]))
row[10]=datetime.datetime.strptime(row[10],'%Y/%m/%d %H:%M')
row[9]=datetime.datetime.strptime(row[9],'%Y/%m/%d %H:%M')
row[4]=float(row[4])
row[5]=int(row[5])
row[7]=int(row[7])
row[11]=int(row[11])
row[12]=int(row[12])
row[13]=int(row[13])
data.append(row)
try:
print(data[0])
print(type(time.time()))
print(type(row[0]),type(row[1]),type(row[2]),type(row[3]),type(row[4]),type(row[5]),type(row[6]),type(row[7]),type(row[8]),type(row[9]),type(row[10]),type(row[11]),type(row[12]),type(row[13]))
csv_reader = csv.reader(open('test.csv'))
client.execute(creattable)
client.execute('INSERT INTO test1 VALUES', data,types_check=True)
print(client.execute('select count(1) from test1'))
except Exception as e:
print(e)
——————————————————————————————————————————————————
下面是python读取Mysql的表数据存为csv,再插入到clickhouse:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from clickhouse_driver import Client
import types
import re
import time,datetime
from datetime import date
import pymysql
import warnings
import csv
warnings.filterwarnings('ignore')
pos1 = pymysql.connect(host='192.168.1.235',port=3306,user='root',password='123456',db='0001790455_pos',charset="utf8")
pos = pos1.cursor()
try:
pos.execute("select *from bigtable limit 1000")
readcsv=pos.fetchall()
readcsv=list(readcsv)
with open('D:\python\csvtest.csv','w',newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(readcsv)
except Exception as e:
print(e)
client = Client(host='192.168.1.231',database='test6',user='default',password='')
creattable="""CREATE TABLE test1 (\
consumption_id UInt64,\
member_id UInt64,\
fans_id UInt64,\
bill_date Date,\
money Float32,\
people_num UInt8,\
dish_name String,\
created_org UInt8,\
open_id String,\
subscribed_time DateTime,\
unsubscribed_time DateTime,\
source_type UInt8,\
sns_type UInt8,\
is_subscribed UInt8\
)ENGINE=MergeTree(bill_date,(consumption_id,created_org),8192)"""
client.execute('DROP TABLE IF EXISTS test1')
data=[]
start = time.time()
with open(r'D:\python\csvtest.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
for row in readCSV:
row[0]=int(row[0])
row[1]=int(row[1])
row[2]=int(row[2])
x=row[3].split('-')
row[3]=date(int(x[0]),int(x[1]),int(x[2]))
x=re.findall(r"\d+\.?\d*",row[10])
row[10]=datetime.datetime(int(x[0]),int(x[1]),int(x[2]),int(x[3]),int(x[4]),int(x[5]))
x=re.findall(r"\d+\.?\d*",row[9])
row[9]=datetime.datetime(int(x[0]),int(x[1]),int(x[2]),int(x[3]),int(x[4]),int(x[5]))
row[4]=float(row[4])
row[5]=int(row[5])
row[7]=int(row[7])
row[11]=int(row[11])
row[12]=int(row[12])
row[13]=int(row[13])
data.append(row)
try:
client.execute(creattable)
client.execute('INSERT INTO test1 VALUES', data,types_check=True)
end = time.time()
print('clickhouse插入时间',end-start)
except Exception as e:
print(e)
同样的,要注意csv数据读取到Python时的格式转换问题