方法1:使用requests同步
from requests import Session
import base64
class LoadSession(Session):
def rebuild_auth(self, prepared_request, response):
"""
No code here means requests will always preserve the Authorization
header when redirected.
"""
def main():
"""
Stream load Demo with Standard Lib requests
"""
username, password = 'root', ''
headers={
"Content-Type": "text/html; charset=UTF-8",
#"Content-Type": "application/octet-stream", # file upload
"connection": "keep-alive",
"max_filter_ratio": "0.2",
"columns": "k,v",
"column_separator": ',',
"Expect": "100-continue",
}
payload = '''k1,v1\nk2,v2\nk3,v3'''
database = 'starrocks_demo'
tablename = 'tb1'
api = 'http://master1:8030/api/%s/%s/_stream_load' % (database, tablename)
session = LoadSession()
session.auth = (username, password)
response = session.put(url=api, headers=headers, data=payload)
#response = session.put(url=api, headers=headers, data= open("a.csv","rb")) # file upload
print(response.json())
if __name__ == '__main__':
main()
方法2:数据保存成文件再执行终端命令同步
import subprocess
import time
class StarRocksClient(object):
def __init__(self, host, port, database, columns, sep,
username, password, filename, table, timeout):
self.filename = filename
self.table = table
self.columns = columns
self.sep = sep
self.host = host
self.port = port
self.database = database
self.user = username
self.password = password
self.timeout = timeout
def get_label(self):
t = time.time().__str__().replace(".", "_")
return '_'.join([self.database,self.table, t])
def load(self):
label = self.get_label()
cmd = "curl"
param_location = "--location-trusted"
param_user = "%s:%s" % (self.user, self.password)
param_file = "%s" % self.filename
param_url = "http://%s:%s/api/%s/%s/_stream_load" % (
self.host, self.port, self.database, self.table
)
p = subprocess.Popen([
cmd, param_location,
"-H", 'columns: %s' %self.columns,
"-H", "column_separator: %s" %self.sep,
"-H", "label: %s" %self.get_label(),
"-H", "timeout: %d" %self.timeout,
"-u", param_user,
"-T", param_file,
param_url])
p.wait()
if p.returncode != 0:
print """\nLoad to starrocks failed! LABEL is %s""" % (label)
else:
print """\nLoad to starrocks success! LABEL is %s """ % (label)
return label
if __name__ == '__main__':
"""
-- Stream load Demo with Linux cmd - Curl
--
-- StarRocks DDL:
CREATE TABLE `starrocks_demo`.`tb1` (
`k` varchar(65533) NULL COMMENT "",
`v` varchar(65533) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`k`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`k`) BUCKETS 1
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false",
"storage_format" = "DEFAULT"
);
"""
# load job 1
client1 = StarRocksClient(
host="master1",
port="8030",
database="starrocks_demo",
username="root",
password="",
filename="/tmp/test.csv", # data from local file /tmp/test.csv, usage: python CurlStreamLoad.py
table="tb1",
columns='k,v',
sep=",",
timeout=86400
)
client1.load()
time.sleep(1)
# load job 2
client2 = StarRocksClient(
host="master1",
port="8030",
database="starrocks_demo",
username="root",
password="",
filename="-", # data from stdin, usage: echo 'k1,v1\nk2,v2'| python CurlStreamLoad.py
table="tb1",
columns='k,v',
sep=",",
timeout=86400
)
client2.load()