python 通过csv插入数据到clickhouse

第一步:连接clickhouse

第二步:读取CSV

第三步:转换成与clickhouse相匹配的数据

第四步:插入,3万行耗时2.3s

from clickhouse_driver import Client
import types 
import time,datetime
from datetime import date

import csv


client = Client(host='192.168.1.231',database='test6',user='default',password='')
creattable="""CREATE TABLE test1 (\
				consumption_id UInt64,\
				member_id UInt64,\
				fans_id UInt64,\
				bill_date Date,\
				money Float32,\
				people_num UInt8,\
				dish_name String,\
				created_org UInt8,\
				open_id String,\
				subscribed_time DateTime,\
				unsubscribed_time DateTime,\
				source_type UInt8,\
				sns_type UInt8,\
				is_subscribed UInt8\
				)ENGINE=MergeTree(bill_date,(consumption_id,created_org),8192)"""
client.execute('DROP TABLE IF EXISTS test1')
data=[]
with open(r'C:\Users\Administrator\Desktop\test.csv') as csvfile:  
	readCSV = csv.reader(csvfile, delimiter=',')  
	for row in readCSV: 
		row[0]=int(row[0])
		row[1]=int(row[1])
		row[2]=int(row[2])
		x=row[3].split('/')
		row[3]=date(int(x[0]),int(x[1]),int(x[2]))
		row[10]=datetime.datetime.strptime(row[10],'%Y/%m/%d %H:%M')
		row[9]=datetime.datetime.strptime(row[9],'%Y/%m/%d %H:%M')
		row[4]=float(row[4])
		row[5]=int(row[5])
		row[7]=int(row[7])
		row[11]=int(row[11])
		row[12]=int(row[12])
		row[13]=int(row[13])
		data.append(row)


try:
	print(data[0])
	print(type(time.time()))
	print(type(row[0]),type(row[1]),type(row[2]),type(row[3]),type(row[4]),type(row[5]),type(row[6]),type(row[7]),type(row[8]),type(row[9]),type(row[10]),type(row[11]),type(row[12]),type(row[13]))
	csv_reader = csv.reader(open('test.csv'))
	client.execute(creattable)
	client.execute('INSERT INTO test1  VALUES', data,types_check=True)
	print(client.execute('select count(1) from test1'))

	
except Exception as e:
    print(e)


——————————————————————————————————————————————————

下面是python读取Mysql的表数据存为csv,再插入到clickhouse:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from clickhouse_driver import Client
import types 
import re
import time,datetime

from datetime import date
import pymysql
import warnings
import csv


warnings.filterwarnings('ignore')
pos1 = pymysql.connect(host='192.168.1.235',port=3306,user='root',password='123456',db='0001790455_pos',charset="utf8")
pos = pos1.cursor()


try:
	pos.execute("select *from bigtable limit 1000")
	readcsv=pos.fetchall()
	readcsv=list(readcsv)
	with open('D:\python\csvtest.csv','w',newline='') as csvfile:
		writer = csv.writer(csvfile)
		writer.writerows(readcsv)
except Exception as e:
    print(e)



client = Client(host='192.168.1.231',database='test6',user='default',password='')
creattable="""CREATE TABLE test1 (\
				consumption_id UInt64,\
				member_id UInt64,\
				fans_id UInt64,\
				bill_date Date,\
				money Float32,\
				people_num UInt8,\
				dish_name String,\
				created_org UInt8,\
				open_id String,\
				subscribed_time DateTime,\
				unsubscribed_time DateTime,\
				source_type UInt8,\
				sns_type UInt8,\
				is_subscribed UInt8\
				)ENGINE=MergeTree(bill_date,(consumption_id,created_org),8192)"""
client.execute('DROP TABLE IF EXISTS test1')
data=[]
start = time.time()

with open(r'D:\python\csvtest.csv') as csvfile:  
	readCSV = csv.reader(csvfile, delimiter=',')  
	for row in readCSV: 
		row[0]=int(row[0])
		row[1]=int(row[1])
		row[2]=int(row[2])
		x=row[3].split('-')
		row[3]=date(int(x[0]),int(x[1]),int(x[2]))		
		x=re.findall(r"\d+\.?\d*",row[10])
		row[10]=datetime.datetime(int(x[0]),int(x[1]),int(x[2]),int(x[3]),int(x[4]),int(x[5]))
		x=re.findall(r"\d+\.?\d*",row[9])
		row[9]=datetime.datetime(int(x[0]),int(x[1]),int(x[2]),int(x[3]),int(x[4]),int(x[5]))
		row[4]=float(row[4])
		row[5]=int(row[5])
		row[7]=int(row[7])
		row[11]=int(row[11])
		row[12]=int(row[12])
		row[13]=int(row[13])
		data.append(row)


try:
	client.execute(creattable)
	client.execute('INSERT INTO test1  VALUES', data,types_check=True)
	end = time.time()
	print('clickhouse插入时间',end-start)

	
except Exception as e:
    print(e)

同样的,要注意csv数据读取到Python时的格式转换问题


你可能感兴趣的:(clickhouse)