tushare多因子选股

借助tushare,计算各个因子,然后根据OLS回归,计算各股票因子分值,排序进行股票购买

导包:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
import tushare as ts
token = token
pro = ts.pro_api(token)
import statsmodels.api as sm
import talib as ta

获取数据

def get_HS300(start,end):df1 = pro.index_weight(index_code='399300.SZ',start_date =start,end_date=end)df1 = list(df1['con_code'])#删除最近一年上市股票,st股票df2 = pro.stock_basic(exchange='',list_status='L')df2 = df2[df2['list_date'].apply(int).values<20190601]df2 = df2[-df2['name'].apply(lambda x:x.startswith('*ST'))]df2 = list(df2['ts_code'])codes = []for i in df1:if i in df2:codes.append(i)return codes
def get_index(start,end):data = pro.index_daily(ts_code='399300.SZ',start_date=start,end_date=end)data.index = pd.to_datetime(data['trade_date'])data.sort_index(ascending=True,inplace=True)data= data['pct_chg']return data

计算因子值

def get_change(lists,start,end):stock_list= []for i in lists:df = pro.daily(ts_code=i,start_date=start,end_date=end)df2 = pro.daily_basic(ts_code=i,start_date=start,end_date=end)df = pd.merge(df,df2['ps_ttm'],left_index=True,right_index=True)df.index = pd.to_datetime(df['trade_date'])df = df.sort_index(ascending=True)df['EMAC12'] = ta.EMA(df.close,timeperiod=12)/df.closedf['HO'] = df.high-df.opendf['OL'] = df.open-df.lowdf['HCY'] = df.high-df.close.shift(1)df['CYL'] = df.close.shift(1)-df.lowdf['AR'] = ta.SUM(df.HO,timeperiod=26)/ta.SUM(df.OL,timeperiod=26)*100df['BR'] = ta.SUM(df.HCY,timeperiod=26)/ta.SUM(df.CYL,timeperiod=26)*100df['ARBR'] = df['AR'] - df['BR']df = df.dropna()df = df[['pct_chg','EMAC12','ARBR','ps_ttm']]stock_list.append(df)return stock_list

中性化:

def MAD(data):data= pd.DataFrame(data)data = (data-data.mean())/data.std()for i in range(len(data.columns)):MAD = np.median(abs(data.iloc[:,i]-np.median(data.iloc[:,i])))MAX = np.median(data.iloc[:,i]) + 3*1.4826*MADMIN = np.median(data.iloc[:,i]) - 3*1.4826*MADdata.iloc[:,i][data.iloc[:,i]>MAX] = MAXdata.iloc[:,i][data.iloc[:,i]

获取基准指数数据:

codes = get_HS300('20200601','20231203')
x =get_change(codes,20200401,20200619)
rf = 1.04**(1/360)-1
for i in x:i['change'] = i['pct_chg'] - rfi['EMAC12_normal'] = MAD(i['EMAC12'])i['ARBR_normal'] = MAD(i['ARBR'])i['ps_ttm_normal'] = MAD(i['ps_ttm'])i.drop(['pct_chg','EMAC12','ARBR','ps_ttm'],axis=1,inplace=True)
x[0].head()

获取股票数据:

HS300_index = get_index('20200513','20231209')
HS300_index = HS300_index-rf
HS300_index.head()
stocks = []
for i in x:stock = pd.merge(i,HS300_index,left_index=True,right_index=True)stock.columns=['日涨跌','EMAC12因子','ARBR因子','滚动市销率因子','市场风险溢价因子']stocks.append(stock)
stocks[0].head()

ols计算回归

results =pd.DataFrame()
for i in range(len(stocks)):#print(i)try:model = sm.OLS(stocks[i]['日涨跌'],sm.add_constant(stocks[i][['EMAC12因子','ARBR因子','滚动市销率因子','市场风险溢价因子']].values))result = model.fit()results[codes[i]] = result.paramsexcept:passresults.head()

根据常数项,判断离散程度,选择最大的,构建股票池:

z = results.sort_values(by=['const'],axis=1)
z.head()


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部