python 同步获取与更新通达信财务数据

利用python同步股票的财务数,进行基本面辅助分析:

# coding: UTF-8
import hashlib
import zipfile
import time

import pandas as pd
import requests
import threading
import sys
import os
from retry import retry
from queue import Queue


#########################
# 读取通达信专业财务数据
######################

class TDXFinance:
    tdxCwPath = ""
    fileType = ".pkl"
    subjectsPath = "investment/tdxSubjects.csv"
    fileInfoColumns = ['filename', 'md5', 'filesize']


    def __init__(self, cwPath, fileType, subjectsPath="investment/tdxSubjects.csv"):
        self.tdxCwPath = cwPath
        self.fileType = fileType
        self.subjectsPath = subjectsPath

    def read_cat(self):
        df = pd.read_csv('tdxSubjectCat.csv', header=0)
        return df

    def read_subjects(self, cat=0):
        subjects = pd.read_csv(self.subjectsPath, sep='--', header=0, encoding="UTF-8", engine='python')
        if cat > 0:  # 只返回指定的分类科目
            return subjects.loc[subjects['cat'] == cat]
        return subjects

    # 根据代码 @param code example:600019
    # 日期 @param date example:202209030 一般为每季的最后一天
    def get_all_infos(self, date, code):
        df = self.get_all_finance(date)
        return df.loc[df[0] == code]

    # 根据代码 @param code example:600019
    # 日期 @param date example:202209030 一般为每季的最后一天
    # 科目 @param subject (1~580) @see read_subject()
    def get_info_by_subject(self, code, date, *subject):
        pass

    # 获取指定日期的所有公司财务数据
    # 日期 @param date YYYY0331,YYYY0930,YYYY1231
    def get_all_finance(self, date):
        pkl_path=self.tdxCwPath + os.sep + 'gpcw' + date + self.fileType
        dat_path=self.tdxCwPath+os.sep+'gpcw'+date+".dat"
        pkl_size=os.stat(pkl_path).st_size
        dat_size=os.stat(dat_path).st_size
        if pkl_size= file_size:
            return file_size

        self.fd = open(name, "wb")  # 续传时直接rb+ 文件不存在时会报错,先wb再rb+
        self.fd.truncate(self.total)  # 建一个和下载文件一样大的文件,不是必须的,stream=True时会用到
        self.fd.close()
        # self.fd = open(self.name, "rb+")           # 续传时ab方式打开时会强制指针指向文件末尾,seek并不管用,应用rb+模式
        thread_list = []
        ts_queue = Queue()  # 用队列的线程安全特性,以列表的形式把开始和结束加到队列
        for ran in self.get_range():
            start_, end_ = ran
            ts_queue.put((start_, end_))

        for i in range(self.num):
            t = threading.Thread(target=self.download, name='th-' + str(i), kwargs={'ts_queue': ts_queue})
            t.setDaemon(True)
            thread_list.append(t)
        for t in thread_list:
            t.start()
        for t in thread_list:
            t.join()  # 设置等待,全部线程完事后再继续

        self.fd.close()

def sync():
    cwPath = "/www/py/cw"  # '/Users/luoshunkui/jpworkspace/cw/gpcw' #d:\\gjdata\\cw\\gpcw
    if sys.platform == 'win32' or sys.platform == 'cygwin':
        cwPath = "d:\\gjdata\\cw"
    if sys.platform=='darwin':
        cwPath='/Users/luoshunkui/userdata/tdx/cw'
    tdxFinance = TDXFinance(cwPath, ".pkl", "tdxSubjects.csv")
    tdxFinance.update()

def test():
    cwPath = "/www/py/cw"  # linux下财务数据存储地址
    if sys.platform == 'win32' or sys.platform == 'cygwin': # windows下的财务数据存储地址
        cwPath = "d:\\gjdata\\cw"
    if sys.platform=='darwin': # macos下的存储地址
        cwPath='/Users/userdata/tdx/cw'
    tdxFinance = TDXFinance(cwPath, ".pkl", "tdxSubjects.csv")
    # cats = tdxFinance.read_cat()
    # print(cats)
    # subjects = tdxFinance.read_subjects(0)
    # print(subjects)
    # x, y, z = 10, 20, 30
    # print(x)
    # print(y)
    # print(z)

    print(sys.argv)
    codes=[]
    date=''
    cats=tdxFinance.read_cat()
    print(cats)
    for arg in sys.argv:
        if arg.startswith('code='):
            codes=arg[5:].split(',')
        if arg.startswith('date='):
            date=arg[5:]
    for code in codes:
        infos = tdxFinance.get_all_infos(date, code)
        print(infos)
    if len(date)>0:
        infos=tdxFinance.get_all_finance(date)
        print(infos)
    # print(float(infos[506])/10000)
    # alls = tdxFinance.get_all_finance('20221231')
    # print(alls)
    # pf=alls.loc[alls[0].isin(['688778','603267'])]
    # print(pf)
    # alls1 = tdxFinance.get_all_finance('20210930')
    # alls2 = tdxFinance.get_all_finance('20200930')
    # alls3 = tdxFinance.get_all_finance('20190930')
    # pf = alls.loc[alls[0].isin(['002539', '000902', '002258'])]
    # print(pf)
    # npf = pf.T
    # print(npf)
    # print(npf.index)

    # nnpf = pd.merge(npf, subjects, left_index=True, right_on='code')
    # print(nnpf)

    # subject = 119
    # yi = 100000000
    # print(subjects.loc[subject])
    # sum2019 = alls3[subject].sum()
    # sum2020 = alls2[subject].sum()
    # sum2021 = alls1[subject].sum()
    # sum = alls[subject].sum()

    # print(sum2019 / yi)
    # print(sum2020 / yi)
    # print(sum2021 / yi)
    # print(sum / yi)

if __name__ == '__main__':

    if 'test' in sys.argv:
        test()
    else:
        sync()

你可能感兴趣的:(python,开发语言)