MachineLearning.1.如何使用ML进行股票投资.Day3

参考内容:

续上一篇

8. Labeling of data part 1

本部分将stock_price和sp500的值波动情况用百分比表示,代码中标有#new的是本次新加的内容。

import pandas as pd
import os
import time
from datetime import datetime

path = "/home/sum/share/Ubuntu_DeepLearning/intraQuarter" #cd path & pwd

def Key_Stats(gather="Total Debt/Equity (mrq)"):
    #read the data sets
    statspath = path+'/_KeyStats'
    stock_list = sorted([x[0] for x in os.walk(statspath)]) #in Linux use sorted() func
    df = pd.DataFrame(columns=['Date',
                               'Unix',
                               'Ticker',
                               'DE Ratio',
                               'Price',
                               'stock_p_change', #new
                               'SP500',
                               'sp500_p_change']) #new

    sp500_df = pd.DataFrame.from_csv("YAHOO-INDEX_GSPC.csv")
    ticker_list = []    #new

    for each_dir in stock_list[1:]:
        each_file = os.listdir(each_dir)
        ticker = each_dir.split("/")[-1]
        ticker_list.append(ticker)      #new
        starting_stock_value = False    #new
        starting_sp500_value = False    #new

        if len(each_file) > 0:
            for file in each_file:
                date_stamp = datetime.strptime(file, '%Y%m%d%H%M%S.html')
                unix_time = time.mktime(date_stamp.timetuple())
                full_file_path = each_dir+'/'+file
                source = open(full_file_path, 'r').read()
                try:
                    value = source.split(gather+':') #exist  or , may exist \n, so just use : and split twice
                    if 1 < len(value):
                        value = value[1].split('')[1].split('')[0]
                    else:
                        value = 'NoValue'

                    try:
                        sp500_date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d')
                        row = sp500_df[(sp500_df.index == sp500_date)]
                        sp500_value = float(row["Adjusted Close"])
                    except:
                        sp500_date = datetime.fromtimestamp(unix_time-259200).strftime('%Y-%m-%d')
                        row = sp500_df[(sp500_df.index == sp500_date)]
                        sp500_value = float(row["Adjusted Close"])
                    #The reason for the Try and Except here is because some of our stock data may have been pulled on a weekend day.
                    # If we hunt for a weekend day's value of the S&P 500, that date just simply wont exist in the dataset

                    stock_price = float(source.split('')[1].split('')[0])
                    #print("ticker:",ticker,"sp500_date:",sp500_date,"stock_price:",stock_price,"sp500_value:",sp500_value)

                    if not starting_stock_value:            #new
                        starting_stock_value = stock_price  #new
                    if not starting_sp500_value:            #new
                        starting_sp500_value = sp500_value  #new
                    stock_p_change = ((stock_price-starting_stock_value)/starting_stock_value) * 100    #new
                    sp500_p_change = ((sp500_value-starting_sp500_value)/starting_sp500_value) * 100    #new


                    #part of the stock_price doesn't exist
                    df = df.append({'Date':date_stamp,
                                    'Unix':unix_time,
                                    'Ticker':ticker,
                                    'DE Ratio':float(value),
                                    'Price':stock_price,
                                    'stock_p_change':stock_p_change,                        #new
                                    'SP500':sp500_value,
                                    'sp500_p_change':sp500_p_change}, ignore_index=True)    #new
                except Exception as e:
                    pass
                    #print(str(e))


    save = gather.replace(' ','').replace('(','').replace(')','').replace('/','')+('.8.csv')
    print(save)
    df.to_csv(save)

Key_Stats()

9. Labeling data part 2

你可能感兴趣的:(MachineLearning.1.如何使用ML进行股票投资.Day3)