forked from ANGELTALAVERA/Finance
-
Notifications
You must be signed in to change notification settings - Fork 2
/
seasonal_stock_analysis.py
427 lines (395 loc) · 14.1 KB
/
seasonal_stock_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
# Import dependencies
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import yfinance as yf
import requests
import pickle
import bs4 as bs
import requests
# You need to change this to a convenient spot on your own hard drive.
my_path = ""
threshold = 0.80
# Scrape a list of the S&P 500 components
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
html = requests.get(url).text
sp_df = pd.read_html(html, header=0)[0]
sp500_tickers = sp_df['Symbol'].tolist()
# Upload a list of the S&P 500 components downloaded from Wikipedia.
mylist = []
mylist2 = []
df_sp500_tickers = pd.DataFrame(list(zip(sp500_tickers)), columns=["Symbol"])
# Loops through the S&P 500 tickers, downloads the data from Yahoo and creates a separate CSV file of historical data for each ticker (e.g. AAPL.csv).
for index, ticker in df_sp500_tickers.iterrows():
global df
my_ticker = ticker['Symbol']
yf_ticker = yf.Ticker(my_ticker)
data = yf_ticker.history(period="max")
df = pd.DataFrame(data)
df.reset_index(level=0, inplace=True)
df['Symbol'] = my_ticker
df = df[['Symbol','Date','Close']]
df.drop_duplicates(subset ="Date", keep = 'first', inplace = True) #Yahoo has a tendency to duplicate the last row.
df.to_csv(path_or_buf = my_path + "/data/" + my_ticker +".csv", index=False)
# Creates the dataframe container for the stats data.
df_tradelist = pd.DataFrame(
index=[],
columns=[
"my_ticker",
"hold_per",
"pct_uprows",
"max_up_return",
"min_up_return",
"avg_up_return",
"avg_down_return",
"exp_return",
"stdev_returns",
"pct_downside",
"worst_return",
"least_pain_pt",
"total_years",
"max_consec_beat",
"best_buy_date",
"best_sell_date",
"analyzed_years",
],
)
df_tradelist.head()
# Convert prices to holding period returns based on 20 trading days per month.
def convert_prices_to_periods():
global dperiods
global dfr
dfr = df.pct_change(periods=dperiods)
dfr.reset_index(level=0, inplace=True)
dfr.rename(columns={"Close": "Returns"}, inplace=True)
dfr = dfr.round(4)
# Separate out the date column into separate month, year and day values.
def separate_date_column():
global dfr
dfr["Month"] = pd.DatetimeIndex(dfr["Date"]).month
dfr["Day"] = pd.DatetimeIndex(dfr["Date"]).day
dfr["Year"] = pd.DatetimeIndex(dfr["Date"]).year
dfr["M-D"] = dfr["Month"].astype(str) + "-" + dfr["Day"].astype(str)
pd.set_option("display.max_rows", len(dfr))
# Pivot the table to show years across the top and Month-Day values in the first column on the left.
def pivot_the_table():
global dfr_pivot
dfr_pivot = dfr.pivot(index="M-D", columns="Year", values="Returns")
dfr_pivot.reset_index(level=0, inplace=True)
dfr_pivot = pd.DataFrame(dfr_pivot)
dfr_pivot.columns.name = "Index"
dfr_pivot.fillna(method="ffill", inplace=True)
# Add additional calculated columns to facilitate statistic calculations for each stock.
def add_calculated_items():
global dfr_pivot
global lookback
global start
# The lookback figure is the number (must be an integer) of years back from last year that you want to include
lookback = 20
start = 1
if lookback > len(dfr_pivot.columns) - 1:
start = 1
else:
start = len(dfr_pivot.columns) - lookback
dfr_pivot["YearCount"] = dfr_pivot.count(axis=1, numeric_only=True)
dfr_pivot["Lookback"] = lookback
dfr_pivot["UpCount"] = dfr_pivot[
dfr_pivot.iloc[:, start : len(dfr_pivot.columns) - 2] > 0
].count(axis=1)
dfr_pivot["DownCount"] = dfr_pivot[
dfr_pivot.iloc[:, start : len(dfr_pivot.columns)] < 0
].count(axis=1)
dfr_pivot["PctUp"] = dfr_pivot["UpCount"] / dfr_pivot["Lookback"]
dfr_pivot["PctDown"] = dfr_pivot["DownCount"] / dfr_pivot["Lookback"]
dfr_pivot["AvgReturn"] = dfr_pivot.iloc[:, start : len(dfr_pivot.columns) - 6].mean(
axis=1
)
dfr_pivot["StDevReturns"] = dfr_pivot.iloc[
:, start : len(dfr_pivot.columns) - 7
].std(axis=1)
dfr_pivot["67PctDownside"] = dfr_pivot["AvgReturn"] - dfr_pivot["StDevReturns"]
dfr_pivot["MaxReturn"] = dfr_pivot.iloc[:, start : len(dfr_pivot.columns) - 9].max(
axis=1
)
dfr_pivot["MinReturn"] = dfr_pivot.iloc[:, start : len(dfr_pivot.columns) - 10].min(
axis=1
)
# Add a fictional date column in Python date/time format so the table can be sorted by date. Then sort by Date.
# Reset the index and round the float values to 4 decimals.
def sortbydate_resetindex_export():
global dfr_pivot
dfr_pivot["Date"] = "2000-" + dfr_pivot["M-D"].astype(str)
dfr_pivot["Date"] = pd.to_datetime(dfr_pivot["Date"], infer_datetime_format=True)
dfr_pivot.sort_values(by="Date", ascending=True, inplace=True)
dfr_pivot.reset_index(inplace=True)
dfr_pivot = dfr_pivot.round(4)
# Calculate the trading statistics for the rolling holding periods for the stock.
def calc_trading_stats():
global interval
global dfr_pivot
global pct_uprows
global max_up_return
global min_up_return
global avg_up_return
global avg_down_return
global exp_return
global stdev_returns
global pct_downside
global worst_return
global least_pain_pt
global total_years
global n_consec
global max_n_consec
global max_consec_beat
global best_sell_date
global best_buy_date
global analyzed_years
global lookback
pct_uprows = (
(
dfr_pivot.loc[dfr_pivot["PctUp"] > threshold, "PctUp"].count()
/ dfr_pivot.loc[:, "PctUp"].count()
)
.astype(float)
.round(4)
)
max_up_return = dfr_pivot.loc[dfr_pivot["PctUp"] > threshold, "MaxReturn"].max()
min_up_return = dfr_pivot.loc[dfr_pivot["PctUp"] > threshold, "MinReturn"].min()
avg_up_return = dfr_pivot.loc[dfr_pivot["PctUp"] > 0.5, "AvgReturn"].mean()
avg_up_return = np.float64(avg_up_return).round(4)
avg_down_return = dfr_pivot.loc[dfr_pivot["PctDown"] > 0.5, "AvgReturn"].mean()
avg_down_return = np.float64(avg_down_return).round(4)
exp_return = round(dfr_pivot["AvgReturn"].mean(), 4)
stdev_returns = dfr_pivot["StDevReturns"].mean()
stdev_returns = np.float64(stdev_returns).round(4)
worst_return = dfr_pivot["MinReturn"].min()
pct_downside = exp_return - stdev_returns
pct_downside = np.float64(pct_downside).round(4)
least_pain_pt = dfr_pivot.loc[dfr_pivot["PctUp"] > threshold, "67PctDownside"].max()
total_years = dfr_pivot["YearCount"].max()
analyzed_years = lookback
n_consec = 0
max_n_consec = 0
for x in dfr_pivot["PctUp"]:
if x > threshold:
n_consec += 1
else: # check for new max, then start again from 1
max_n_consec = max(n_consec, max_n_consec)
n_consec = 1
max_consec_beat = max_n_consec
try:
best_sell_date = dfr_pivot.loc[
dfr_pivot["67PctDownside"] == least_pain_pt, "M-D"
].iloc[0]
except:
best_sell_date = "nan"
try:
row = (
dfr_pivot.loc[dfr_pivot["M-D"] == best_sell_date, "M-D"].index[0] - interval
)
col = dfr_pivot.columns.get_loc("M-D")
best_buy_date = dfr_pivot.iloc[row, col]
except:
best_buy_date = "nan"
# If the pct_uprows and history conditions are met, then create the array of stat values and append it to the recommended trade list.
def filter_and_append_stats():
global statsdata
global df_statsdata
global df_tradelist
# Save the stats data separately to export to Excel for further research on each ticker if desired.
statsdata = np.array(
[
my_ticker,
hold_per,
pct_uprows,
max_up_return,
min_up_return,
avg_up_return,
avg_down_return,
exp_return,
stdev_returns,
pct_downside,
worst_return,
least_pain_pt,
total_years,
max_consec_beat,
best_buy_date,
best_sell_date,
analyzed_years,
]
)
df_statsdata = pd.DataFrame(
statsdata.reshape(-1, len(statsdata)),
columns=[
"my_ticker",
"hold_per",
"pct_uprows",
"max_up_return",
"min_up_return",
"avg_up_return",
"avg_down_return",
"exp_return",
"stdev_returns",
"pct_downside",
"worst_return",
"least_pain_pt",
"total_years",
"max_consec_beat",
"best_buy_date",
"best_sell_date",
"analyzed_years",
],
)
if pct_uprows > 0.1:
if total_years > 9:
df_tradelist = df_tradelist.append(
dict(zip(df_tradelist.columns, statsdata)), ignore_index=True
)
# This module grabs each ticker file, transforms it and calculates the statistics needed for a 90 day holding period.
def calc_3month_returns():
global dfr
global dfr_pivot
global df_tradelist
global dfr_3mo
global df_statsdata_3mo
global threshold
global hold_per
global dperiods
global interval
dperiods = 60
hold_per = "3 Mos"
interval = 90
convert_prices_to_periods()
separate_date_column()
pivot_the_table()
add_calculated_items()
sortbydate_resetindex_export()
dfr_3mo = pd.DataFrame(dfr_pivot)
calc_trading_stats()
filter_and_append_stats()
df_statsdata_3mo = df_statsdata.copy()
# This module grabs each ticker file, transforms it and calculates the statistics needed for a 60 day holding period.
def calc_2month_returns():
global dfr
global dfr_pivot
global df_tradelist
global dfr_2mo
global df_statsdata_2mo
global threshold
global hold_per
global dperiods
global interval
dperiods = 40
hold_per = "2 Mos"
interval = 60
convert_prices_to_periods()
separate_date_column()
pivot_the_table()
add_calculated_items()
sortbydate_resetindex_export()
dfr_2mo = pd.DataFrame(dfr_pivot)
calc_trading_stats()
filter_and_append_stats()
df_statsdata_2mo = df_statsdata.copy()
# This module grabs each ticker file, transforms it and calculates the statistics needed for a 30 day holding period.
def calc_1month_returns():
global dfr
global dfr_pivot
global df_tradelist
global dfr_1mo
global df_statsdata_1mo
global threshold
global hold_per
global dperiods
global interval
dperiods = 20
hold_per = "1 Mo"
interval = 30
convert_prices_to_periods()
separate_date_column()
pivot_the_table()
add_calculated_items()
sortbydate_resetindex_export()
dfr_1mo = pd.DataFrame(dfr_pivot)
calc_trading_stats()
filter_and_append_stats()
df_statsdata_1mo = df_statsdata.copy()
# Build and export an Excel file for each ticker using XlsxWriter
def export_to_excel():
excel_file_path = my_path + "/data/" + my_ticker + ".xlsx"
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(excel_file_path, engine="xlsxwriter")
# Convert the dataframe to an XlsxWriter Excel object.
df_statsdata_1mo.to_excel(writer, sheet_name="Stats", index=False)
df_statsdata_2mo.to_excel(
writer, sheet_name="Stats", startrow=2, header=False, index=False
)
df_statsdata_3mo.to_excel(
writer, sheet_name="Stats", startrow=3, header=False, index=False
)
dfr_1mo.to_excel(writer, sheet_name="1 Mo Returns", index=False)
dfr_2mo.to_excel(writer, sheet_name="2 Mo Returns", index=False)
dfr_3mo.to_excel(writer, sheet_name="3 Mo Returns", index=False)
# Get the xlsxwriter objects from the dataframe writer object.
workbook = writer.book
worksheet1 = writer.sheets["Stats"]
worksheet2 = writer.sheets["1 Mo Returns"]
worksheet3 = writer.sheets["2 Mo Returns"]
worksheet4 = writer.sheets["3 Mo Returns"]
# Add conditional formatting to highlight positive returns in green
end_column = dfr_1mo.columns.get_loc("YearCount")
grn_format = workbook.add_format({"bg_color": "#C6EFCE", "font_color": "#006100"})
worksheet2.conditional_format(
1,
2,
365,
end_column - 1,
{"type": "cell", "criteria": ">", "value": 0, "format": grn_format},
)
worksheet3.conditional_format(
1,
2,
365,
end_column - 1,
{"type": "cell", "criteria": ">", "value": 0, "format": grn_format},
)
worksheet4.conditional_format(
1,
2,
365,
end_column - 1,
{"type": "cell", "criteria": ">", "value": 0, "format": grn_format},
)
# Freeze panes for scrolling
worksheet2.freeze_panes(1, 2)
worksheet3.freeze_panes(1, 2)
worksheet4.freeze_panes(1, 2)
# Save the file
writer.save()
# Read CSV files by ticker, transform and extract stats from each one.
for index, ticker in df_sp500_tickers.iterrows():
global dfr
my_ticker = ticker["Symbol"]
df = pd.read_csv(my_path + "/data/" + my_ticker + ".csv")
df.set_index("Date", inplace=True)
df = df["Close"]
df = pd.DataFrame(df, columns=["Close"])
calc_1month_returns()
calc_2month_returns()
calc_3month_returns()
export_to_excel()
# Make a copy and convert the trade list to a Pandas dataframe.
df_tradelist_copy = df_tradelist.copy()
df_tradelist = pd.DataFrame(df_tradelist)
# Clean it up by removing rows with NaN's and infinity values and dropping duplicates.
df_tradelist.replace("inf", np.nan, inplace=True)
df_tradelist.dropna(inplace=True)
df_tradelist = df_tradelist[~df_tradelist.max_up_return.str.contains("nan")]
df_tradelist = df_tradelist[~df_tradelist.avg_down_return.str.contains("nan")]
df_tradelist.sort_values(by=["pct_uprows"], ascending=False)
df_tradelist.drop_duplicates(subset="my_ticker", keep="first", inplace=True)
df_tradelist.tail(10)
df_tradelist.head()
# Export the trade list to CSV files for execution and/or further research if desired.
df_tradelist.to_csv(path_or_buf=my_path + "/df_tradelist.csv", index=False)