#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
from datetime import datetime, time, timedelta
import h5py
import numpy as np
import pandas as pd
from dateutil import parser
from pymongo import MongoClient
import config
from util import date_range
PERIODS = set([
'1min',
# '3min',
# '5min',
# '15min',
# '30min',
# '1day',
# '3day',
# '1week',
# '1hour',
# '2hour',
# '4hour',
# '6hour',
# '12hour',
])
MIN_CANDLE_FOLDER = os.path.join(config.data_dir, 'bar')
TICK_FOLDER = os.path.join(config.data_dir, 'tick')
def hist_symbol(date):
"""获取历史代码表
:param str date: 日期
:returns: 当日代码列表,空则为None
:rtype: list or None
"""
with MongoClient(config.mongo_read_uri) as client:
db = client.master
sym_col = db.hist_symbols
dt = parser.parse(date)
cond = {"date": dt}
data = sym_col.find_one(cond, projection={"symbols": 1, "_id": 0})
if data and 'symbols' in data:
return data['symbols']
else:
return None
def candle(symbol, period, begin, end):
"""获取K线数据
:param symbol: 代码
:param str period: 周期,支持:1min
:param str begin: 起始时间(闭区间)
:param str end: 结束时间(闭区间)
:returns: index: datetime64; columns: open, high, low, close, volume
:rtype: pandas DataFrame or None
"""
begin_dt = parser.parse(begin)
end_dt = parser.parse(end)
begin_date = begin_dt.date()
end_date = end_dt.date()
if period not in PERIODS:
raise KeyError(
'argument wrong: period should be in [%s], given value %s',
','.join(list(PERIODS)), period)
exchange, sym = symbol.split('/')
h5filepath = os.path.join(MIN_CANDLE_FOLDER, exchange, sym + '.h5')
if not os.path.isfile(h5filepath):
raise ValueError('file not existed: ' + h5filepath)
timestamp_cache = []
price_cache = []
volume_cache = []
with h5py.File(h5filepath, 'r') as min_fs:
for dt in date_range(begin_date, end_date):
date_str = str(dt)
if date_str not in min_fs:
continue
timestamp_cache.append(min_fs[date_str]['timestamps'][...])
price_cache.append(min_fs[date_str]['prices'][...])
volume_cache.append(min_fs[date_str]['volumes'][...])
timestamp_cache = np.reshape(np.concatenate(timestamp_cache), (-1, 1))
price_cache = np.concatenate(price_cache)
volume_cache = np.reshape(np.concatenate(volume_cache), (-1, 1))
cache = np.concatenate((timestamp_cache, price_cache, volume_cache),
axis=1)
if len(cache) == 0:
raise ValueError('empty data since {} until {}'.format(
begin_dt, end_dt))
df = pd.DataFrame(
data=cache,
columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['datetime'] = df['timestamp'].apply(
lambda x: datetime.utcfromtimestamp(x))
df = df.drop('timestamp', axis=1)
df = df.set_index('datetime')
return df[begin_dt:end_dt]
def tick(symbol, begin, end, level=20):
"""获取深度tick数据
:param symbol: 代码
:param str begin: 起始时间(闭区间)
:param str end: 结束时间(闭区间)
:param str level: 档位数
:returns: index: datetime64; columns: bidpN~1, last, askp1~N, bidsN~1,
volume, asks1~N, 说明:bidp代表买价,bids代表买量
:rtype: pandas DataFrame or None
"""
begin_dt = parser.parse(begin)
end_dt = parser.parse(end)
begin_date = begin_dt.date()
end_date = end_dt.date()
split_end_dt = datetime.combine(end_date, time(16, 0, 0))
if end_dt > split_end_dt:
end_date += timedelta(days=1)
exchange, sym = symbol.split('/')
timestamp_cache = []
price_cache = []
volume_cache = []
for date in date_range(begin_date, end_date):
date_str = str(date)
h5filepath = os.path.join(TICK_FOLDER, exchange, sym, date_str + '.h5')
if not os.path.isfile(h5filepath):
raise ValueError('file not existed: ' + h5filepath)
with h5py.File(h5filepath, 'r') as fs:
timestamp_cache.append(fs['timestamps'][...])
max_level = int((fs['prices'].shape[1] - 1) / 2)
level_diff = max_level - level
if level_diff < 0:
raise ValueError(
'level is larger than shape in {} file'.format(h5filepath))
elif level_diff > 0:
level_slice = slice(level_diff, -level_diff)
else:
level_slice = slice(None, None, None)
price_cache.append(fs['prices'][..., level_slice])
volume_cache.append(fs['volumes'][..., level_slice])
if len(timestamp_cache) == 0:
raise ValueError('empty data since {} until {}'.format(
begin_dt, end_dt))
timestamp_cache = np.reshape(np.concatenate(timestamp_cache), (-1, 1))
price_cache = np.concatenate(price_cache)
volume_cache = np.concatenate(volume_cache)
cache = np.concatenate((timestamp_cache, price_cache, volume_cache),
axis=1)
columns = ['timestamp']
columns.extend(['bidp' + str(x) for x in range(level, 0, -1)])
columns.append('last')
columns.extend(['askp' + str(x) for x in range(1, level + 1)])
columns.extend(['bids' + str(x) for x in range(level, 0, -1)])
columns.append('volume')
columns.extend(['asks' + str(x) for x in range(1, level + 1)])
df = pd.DataFrame(data=cache, columns=columns)
df['datetime'] = df['timestamp'].apply(
lambda x: datetime.utcfromtimestamp(x))
df = df.drop('timestamp', axis=1)
df = df.set_index('datetime').sort_index()
return df[begin_dt:end_dt]
#if __name__ == '__main__':
# # df = candle(
# # 'okex/btc.usdt', '1min', begin='2018-9-1 1:05:00', end='2018-9-3')
# # print(df.head())
# df = tick(
# 'okex/eos.usdt',
# begin='2018-9-9 00:00:00',
# end='2018-9-9 00:02:00',
# level=1)
# print(df.head())