import warnings
warnings.filterwarnings('ignore')
import yfinance as yf
import cufflinks as cf
cf.set_config_file(offline = True)
import numpy as np
import pandas as pd
yf.download?
df1 = yf.download('SPY', period='5d', progress=False)
df1
df2 = yf.download('SPY', start='2020-12-24', end = '2020-12-31', progress=False)
df2
df3 = yf.download('SPY', period='ytd', progress = False)
df3.tail()
df4 = yf.download('AMZN', period = '5d', interval = '1m', progress = False)
df4.tail()
spy = yf.Ticker('SPY')
options = spy.option_chain('2021-11-19') #Expiry date = 2021 - 11 - 19
# Filter call option for strike above 400
call = options.calls[options.calls['strike']>400]
call.reset_index(drop=True, inplace=True)
call.head()
faang_stocks = ['AAPL', 'AMZN', 'FB', 'GOOG', 'NFLX']
dfm1 = yf.download(faang_stocks, period = '250d', progress=False)['Adj Close']
dfm1.tail()
# Demonstrating how the code below works
a = {symbol:1 for symbol in faang_stocks}
a
# This works similar to the code above, but 1 is replaced with
# yf.download(symbol, period='250d', progress = False)
dfm2 = {symbol:yf.download(symbol, period='250d', progress = False) for symbol in faang_stocks}
dfm2['AAPL']
from pandas import ExcelWriter
# Store the fetched data in a separate SHEET for each security
# Assume there is a folder called 'data'
writer = ExcelWriter('data/mystocks.xlsx')
[pd.DataFrame(dfm2[symbol]).to_excel(writer,symbol) for symbol in faang_stocks]
writer.save()
faang = pd.read_excel('data/mystocks.xlsx', sheet_name='NFLX', index_col=0, parse_dates=True)
faang
[pd.DataFrame(dfm2[symbol]).to_csv('data/'+symbol+'.csv') for symbol in faang_stocks]
print('Done')
# Reads the two tables from https://en.wikipedia.org/wiki/List_of_S%26P_500_companies
# into sp500[0] and sp500[1] respectively
# The first table (sp500[0]) has a column named 'Symbol'
sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
sp500[0]['Symbol']
# Convert the pandas Series to a Python list
stockslist = list(sp500[0]['Symbol'])
stockslist[:10]
When we import the cufflibnks library, all pandas dataframes and series objects have a new method .iplot() attached to them (similar to the pandas' .plot() method)
df3.index = pd.to_datetime(df3.index)
df3.index
df3['Adj Close'].iplot(kind='line', title='SPY Year to Date Adjusted Close')
df3[-30:].iplot(kind='ohlc', title = 'SPY OHLC Plot')
# df3[-30:].iplot(kind='candle', title = 'SPY OHLC Plot')
# Using secondary axis (AAPL price will be displayed on the right)
dfm1[['AMZN', 'AAPL']].iplot(title = 'Amazon Vs Apple', secondary_y = 'AAPL')
# Using Subplots
dfm1[['AMZN', 'AAPL']].iplot(title = 'Amazon Vs Apple', subplots = True)
dfm1.normalize().iplot(title = 'FAANG Stocks')
spy = yf.download('SPY', start='2015-01-02', end = '2020-12-31', progress=False)
years = spy.index.year.unique()
years
newdf = pd.DataFrame()
for year in years:
newdf[year] = pd.Series(spy[spy.index.year == year]['Adj Close']).reset_index(drop=True)
newdf.tail()
# Code to demonstrate that when you add two series to a Dataframe, the first series should be the longest one.
# Else the subsequent series will be truncated.
anotherdf1 = pd.DataFrame()
anotherdf2 = pd.DataFrame()
s1 = pd.Series({'A':1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'H': 8, 'I':9})
s2 = pd.Series([4, 5])
anotherdf1[0] = s1.reset_index(drop=True)
anotherdf1[1] = s2.reset_index(drop=True)
display(anotherdf1)
anotherdf2[0] = s2.reset_index(drop=True)
anotherdf2[1] = s1.reset_index(drop=True)
display(anotherdf2)
# Fill Forward - Use Friday's data to fill today's missing value
# Fill Backward - Use Monday's data to fill Friday's missing value
newdff = newdf.ffill(axis = 0)
newdff.tail()
newdff.describe().T
newdff.iplot(kind='box', title='SPY Box Plot', yTitle = 'Adj Close', legend = False, boxpoints = 'outliers')