#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
from datetime import datetime, time, timedelta
import h5py
import numpy as np
import pandas as pd
from dateutil import parser
from pymongo import MongoClient
import config
from util import date_range
PERIODS = set([
# '3min',
# '5min',
# '15min',
# '30min',
# '1day',
# '3day',
# '1week',
# '1hour',
# '2hour',
# '4hour',
# '6hour',
# '12hour',
MIN_CANDLE_FOLDER = os.path.join(config.data_dir, 'bar')
TICK_FOLDER = os.path.join(config.data_dir, 'tick')
def hist_symbol(date):
:param str date: 日期
:returns: 当日代码列表,空则为None
:rtype: list or None
with MongoClient(config.mongo_read_uri) as client:
db = client.master
sym_col = db.hist_symbols
dt = parser.parse(date)
cond = {"date": dt}
data = sym_col.find_one(cond, projection={"symbols": 1, "_id": 0})
if data and 'symbols' in data:
return data['symbols']
return None
def candle(symbol, period, begin, end):
:param symbol: 代码
:param str period: 周期,支持:1min
:param str begin: 起始时间(闭区间)
:param str end: 结束时间(闭区间)
:returns: index: datetime64; columns: open, high, low, close, volume
:rtype: pandas DataFrame or None
begin_dt = parser.parse(begin)
end_dt = parser.parse(end)
begin_date = begin_dt.date()
end_date = end_dt.date()
if period not in PERIODS:
raise KeyError(
'argument wrong: period should be in [%s], given value %s',
','.join(list(PERIODS)), period)
exchange, sym = symbol.split('/')
h5filepath = os.path.join(MIN_CANDLE_FOLDER, exchange, sym + '.h5')
if not os.path.isfile(h5filepath):
raise ValueError('file not existed: ' + h5filepath)
timestamp_cache = []
price_cache = []
volume_cache = []
with h5py.File(h5filepath, 'r') as min_fs:
for dt in date_range(begin_date, end_date):
date_str = str(dt)
if date_str not in min_fs:
timestamp_cache = np.reshape(np.concatenate(timestamp_cache), (-1, 1))
price_cache = np.concatenate(price_cache)
volume_cache = np.reshape(np.concatenate(volume_cache), (-1, 1))
cache = np.concatenate((timestamp_cache, price_cache, volume_cache),
if len(cache) == 0:
raise ValueError('empty data since {} until {}'.format(
begin_dt, end_dt))
df = pd.DataFrame(
columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['datetime'] = df['timestamp'].apply(
lambda x: datetime.utcfromtimestamp(x))
df = df.drop('timestamp', axis=1)
df = df.set_index('datetime')
return df[begin_dt:end_dt]
def tick(symbol, begin, end, level=20):
:param symbol: 代码
:param str begin: 起始时间(闭区间)
:param str end: 结束时间(闭区间)
:param str level: 档位数
:returns: index: datetime64; columns: bidpN~1, last, askp1~N, bidsN~1,
volume, asks1~N, 说明:bidp代表买价,bids代表买量
:rtype: pandas DataFrame or None
begin_dt = parser.parse(begin)
end_dt = parser.parse(end)
begin_date = begin_dt.date()
end_date = end_dt.date()
split_end_dt = datetime.combine(end_date, time(16, 0, 0))
if end_dt > split_end_dt:
end_date += timedelta(days=1)
exchange, sym = symbol.split('/')
timestamp_cache = []
price_cache = []
volume_cache = []
for date in date_range(begin_date, end_date):
date_str = str(date)
h5filepath = os.path.join(TICK_FOLDER, exchange, sym, date_str + '.h5')
if not os.path.isfile(h5filepath):
raise ValueError('file not existed: ' + h5filepath)
with h5py.File(h5filepath, 'r') as fs:
max_level = int((fs['prices'].shape[1] - 1) / 2)
level_diff = max_level - level
if level_diff < 0:
raise ValueError(
'level is larger than shape in {} file'.format(h5filepath))
elif level_diff > 0:
level_slice = slice(level_diff, -level_diff)
level_slice = slice(None, None, None)
price_cache.append(fs['prices'][..., level_slice])
volume_cache.append(fs['volumes'][..., level_slice])
if len(timestamp_cache) == 0:
raise ValueError('empty data since {} until {}'.format(
begin_dt, end_dt))
timestamp_cache = np.reshape(np.concatenate(timestamp_cache), (-1, 1))
price_cache = np.concatenate(price_cache)
volume_cache = np.concatenate(volume_cache)
cache = np.concatenate((timestamp_cache, price_cache, volume_cache),
columns = ['timestamp']
columns.extend(['bidp' + str(x) for x in range(level, 0, -1)])
columns.extend(['askp' + str(x) for x in range(1, level + 1)])
columns.extend(['bids' + str(x) for x in range(level, 0, -1)])
columns.extend(['asks' + str(x) for x in range(1, level + 1)])
df = pd.DataFrame(data=cache, columns=columns)
df['datetime'] = df['timestamp'].apply(
lambda x: datetime.utcfromtimestamp(x))
df = df.drop('timestamp', axis=1)
df = df.set_index('datetime').sort_index()
return df[begin_dt:end_dt]
#if __name__ == '__main__':
# # df = candle(
# # 'okex/btc.usdt', '1min', begin='2018-9-1 1:05:00', end='2018-9-3')
# # print(df.head())
# df = tick(
# 'okex/eos.usdt',
# begin='2018-9-9 00:00:00',
# end='2018-9-9 00:02:00',
# level=1)
# print(df.head())