量化交易之One Piece篇 - onepiece_rsh - 按小节时间清洗全市场盘口数据

import os
import re

import pandas
# from tqz_extern.pandas_operator import pandas
from tqz_extern.json_operator import TQZJsonOperator

import warnings
warnings.filterwarnings("ignore")

class MarketDataParser:

    session_map = TQZJsonOperator.tqz_load_jsonfile(jsonfile='../trading_time/source_trading_time.json')

    source_content = None

    target_dir: str = ''

    @classmethod
    def dump_all_format_csv(cls, datetime_str: str):
        """
        Dump all instruments' format csv of one day.
        :return:
        """
        cls.__check_source_file(datetime_str=datetime_str)

        assert cls.source_content is not None, f'cls.source_content is None.'
        all_instrument_ids = list(set(cls.source_content['InstrumentID'].values))

        for instrumentID in all_instrument_ids:
            single_instrument_df = cls.source_content[cls.source_content["InstrumentID"] == instrumentID]

            cls.__get_format_market_data(
                instrument_source_df=single_instrument_df
            ).to_csv(f'{cls.target_dir}/{instrumentID}.csv', index=False)


    @classmethod
    def __check_source_file(cls, datetime_str: str):
        """
        Check single day's market depth data csv file.
        :param datetime_str: datatime of need parse, eg: 20230926.
        """

        year, month, day = datetime_str[:4], datetime_str[4:6], datetime_str[6:]

        source_path = f'E:/futures_market_data/market_depth_data/{year}/{month}/market_depth_data_{datetime_str}.csv'
        cls.target_dir = f'E:/futures_market_data/target_data/{year}/{month}/{datetime_str}'

        assert os.path.exists(path=source_path) is True, f'Bad source_path {source_path}.'
        if os.path.exists(path=cls.target_dir) is False:
            os.makedirs(cls.target_dir, exist_ok=True)

        cls.source_content = pandas.read_csv(source_path)



    @classmethod
    def __get_format_market_data(cls, instrument_source_df: pandas.DataFrame()) -> pandas.DataFrame():
        """
        Clean single instrument dataframe.
        :param instrument_source_df: source dataframe of single instrument
        :return: single instrument dataframe after clean.
        """

        assert len(instrument_source_df['ExchangeInstrument'].unique()) == 1, f'Bad ExchangeInstrument {instrument_source_df["ExchangeInstrument"].unique()}.'
        symbol = re.sub(r'\d+', '', instrument_source_df['ExchangeInstrument'].unique()[0])

        assert symbol in cls.session_map.keys(), f'Bad symbol: {symbol}.'
        symbol_session_map = cls.session_map[symbol]['night'] + cls.session_map[symbol]['day']

        instrument_source_df['trading_time'] = False
        instrument_format_df = None
        for single_session in symbol_session_map:
            if 2 == len(single_session):
                if single_session[0] < single_session[1]:
                    instrument_source_df['trading_time'] = (instrument_source_df['UpdateTime'] >= single_session[0]) & (instrument_source_df['UpdateTime'] < single_session[1])
                elif single_session[1] > single_session[0]:
                    instrument_source_df['trading_time'] = (instrument_source_df['UpdateTime'] >= single_session[0]) | (instrument_source_df['UpdateTime'] < single_session[1])

                if instrument_format_df is None:
                    instrument_format_df = instrument_source_df[instrument_source_df['trading_time'] == True]
                else:
                    instrument_format_df = pandas.concat([instrument_format_df, instrument_source_df[instrument_source_df['trading_time'] == True]])

        instrument_format_df.sort_values(by='Timestamp', ascending=True, inplace=True)
        instrument_format_df.reset_index(inplace=True)
        del instrument_format_df['index']
        del instrument_format_df['trading_time']

        return instrument_format_df


if __name__ == '__main__':
    parser_datetime = '20230926'

    MarketDataParser.dump_all_format_csv(datetime_str=parser_datetime)

你可能感兴趣的:(One,Piece,python,one,piece)