Import Statements

In [1]:
import warnings
warnings.filterwarnings('ignore')
import yfinance as yf
import cufflinks as cf
cf.set_config_file(offline = True)
import numpy as np
import pandas as pd

How to get help

In [2]:
yf.download?

Downloading Single Stock Data from Yahoo

5 Days

In [3]:
df1 = yf.download('SPY', period='5d', progress=False)
df1
Out[3]:
Open High Low Close Adj Close Volume
Date
2021-10-25 454.279999 455.899994 452.390015 455.549988 455.549988 45214500
2021-10-26 457.200012 458.489990 455.559998 455.959991 455.959991 56075100
2021-10-27 456.450012 457.160004 453.859985 453.940002 453.940002 72438000
2021-10-28 455.459991 458.399994 455.450012 458.320007 458.320007 51437900
2021-10-29 455.869995 459.559998 455.559998 459.250000 459.250000 70108200

Specify Dates

In [4]:
df2 = yf.download('SPY', start='2020-12-24', end = '2020-12-31', progress=False)
df2
Out[4]:
Open High Low Close Adj Close Volume
Date
2020-12-23 368.279999 369.619995 367.220001 367.570007 364.009216 46201400
2020-12-24 368.079987 369.029999 367.450012 369.000000 365.425323 26457900
2020-12-28 371.739990 372.589996 371.070007 372.170013 368.564636 39000400
2020-12-29 373.809998 374.000000 370.829987 371.459991 367.861481 53680500
2020-12-30 372.339996 373.100006 371.570007 371.989990 368.386353 49455300

Year to Date

In [5]:
df3 = yf.download('SPY', period='ytd', progress = False)
df3.tail()
Out[5]:
Open High Low Close Adj Close Volume
Date
2021-10-25 454.279999 455.899994 452.390015 455.549988 455.549988 45214500
2021-10-26 457.200012 458.489990 455.559998 455.959991 455.959991 56075100
2021-10-27 456.450012 457.160004 453.859985 453.940002 453.940002 72438000
2021-10-28 455.459991 458.399994 455.450012 458.320007 458.320007 51437900
2021-10-29 455.869995 459.559998 455.559998 459.250000 459.250000 70108200

Intraday (Up to 60 days)

In [6]:
df4 = yf.download('AMZN', period = '5d', interval = '1m', progress = False)
df4.tail()
Out[6]:
Open High Low Close Adj Close Volume
Datetime
2021-10-29 15:55:00-04:00 3366.300049 3369.580078 3361.959961 3366.074951 3366.074951 42259
2021-10-29 15:56:00-04:00 3366.419922 3366.419922 3362.800049 3365.639893 3365.639893 23804
2021-10-29 15:57:00-04:00 3365.449951 3368.000000 3365.250000 3367.495117 3367.495117 31584
2021-10-29 15:58:00-04:00 3367.159912 3370.989990 3366.510010 3370.290039 3370.290039 41189
2021-10-29 15:59:00-04:00 3371.185059 3374.820068 3370.474365 3372.429932 3372.429932 77840

Option Chain

In [7]:
spy = yf.Ticker('SPY')
options = spy.option_chain('2021-11-19') #Expiry date = 2021 - 11 - 19
In [8]:
# Filter call option for strike above 400
call = options.calls[options.calls['strike']>400]
call.reset_index(drop=True, inplace=True)
call.head()
Out[8]:
contractSymbol lastTradeDate strike lastPrice bid ask change percentChange volume openInterest impliedVolatility inTheMoney contractSize currency
0 SPY211119C00401000 2021-10-22 15:39:29 401.0 51.25 58.66 59.03 0.000000 0.000000 5.0 148 0.347419 True REGULAR USD
1 SPY211119C00402000 2021-10-20 17:11:01 402.0 50.67 57.67 58.04 0.000000 0.000000 14.0 150 0.343146 True REGULAR USD
2 SPY211119C00403000 2021-10-22 15:08:30 403.0 53.64 56.68 57.05 0.000000 0.000000 6.0 300 0.338874 True REGULAR USD
3 SPY211119C00404000 2021-10-29 15:28:44 404.0 54.59 55.69 56.07 6.599998 13.752861 1.0 59 0.335456 True REGULAR USD
4 SPY211119C00405000 2021-10-27 16:00:46 405.0 53.50 54.70 55.08 2.060001 4.004668 5.0 403 0.331061 True REGULAR USD

Downloading Multiple Stocks Data

Getting Adjusted Close

In [9]:
faang_stocks = ['AAPL', 'AMZN', 'FB', 'GOOG', 'NFLX']

dfm1 = yf.download(faang_stocks, period = '250d', progress=False)['Adj Close']
dfm1.tail()
Out[9]:
AAPL AMZN FB GOOG NFLX
Date
2021-10-25 148.639999 3320.370117 328.690002 2775.459961 671.659973
2021-10-26 149.320007 3376.070068 315.809998 2793.439941 668.520020
2021-10-27 148.850006 3392.489990 312.220001 2928.550049 662.919983
2021-10-28 152.570007 3446.570068 316.920013 2922.580078 674.049988
2021-10-29 149.800003 3372.429932 323.570007 2965.409912 690.309998

Getting OHLCV

In [10]:
# Demonstrating how the code below works

a = {symbol:1 for symbol in faang_stocks}
a
Out[10]:
{'AAPL': 1, 'AMZN': 1, 'FB': 1, 'GOOG': 1, 'NFLX': 1}
In [11]:
# This works similar to the code above, but 1 is replaced with 
# yf.download(symbol, period='250d', progress = False)

dfm2 = {symbol:yf.download(symbol, period='250d', progress = False) for symbol in faang_stocks}
In [12]:
dfm2['AAPL']
Out[12]:
Open High Low Close Adj Close Volume
Date
2020-11-03 109.660004 111.489998 108.730003 110.440002 109.734215 107624400
2020-11-04 114.139999 115.589996 112.349998 114.949997 114.215378 138235500
2020-11-05 117.949997 119.620003 116.870003 119.029999 118.269310 126387100
2020-11-06 118.320000 119.199997 116.129997 118.690002 118.134949 114457900
2020-11-09 120.500000 121.989998 116.050003 116.320000 115.776031 154515300
... ... ... ... ... ... ...
2021-10-25 148.679993 149.369995 147.619995 148.639999 148.639999 50720600
2021-10-26 149.330002 150.839996 149.009995 149.320007 149.320007 60893400
2021-10-27 149.360001 149.729996 148.490005 148.850006 148.850006 56094900
2021-10-28 149.820007 153.169998 149.720001 152.570007 152.570007 100077900
2021-10-29 147.220001 149.940002 146.410004 149.800003 149.800003 124850400

250 rows × 6 columns

Working with Excel

Export to Excel

In [13]:
from pandas import ExcelWriter
In [14]:
# Store the fetched data in a separate SHEET for each security
# Assume there is a folder called 'data'

writer = ExcelWriter('data/mystocks.xlsx')
[pd.DataFrame(dfm2[symbol]).to_excel(writer,symbol) for symbol in faang_stocks]
writer.save()

Read from Excel

In [15]:
faang = pd.read_excel('data/mystocks.xlsx', sheet_name='NFLX', index_col=0, parse_dates=True)
faang
Out[15]:
Open High Low Close Adj Close Volume
Date
2020-11-03 484.929993 495.309998 478.760010 487.220001 487.220001 3690200
2020-11-04 495.359985 507.730011 493.980011 496.950012 496.950012 5137300
2020-11-05 506.559998 518.729980 503.450012 513.760010 513.760010 5372800
2020-11-06 515.000000 515.210022 502.510010 514.729980 514.729980 4236300
2020-11-09 485.540009 495.850006 467.260010 470.500000 470.500000 10419700
... ... ... ... ... ... ...
2021-10-25 663.739990 675.880005 657.070007 671.659973 671.659973 3833500
2021-10-26 673.760010 676.489990 662.770020 668.520020 668.520020 2904800
2021-10-27 669.000000 671.409973 661.849976 662.919983 662.919983 2276900
2021-10-28 670.950012 676.799988 668.030029 674.049988 674.049988 2859400
2021-10-29 673.059998 690.969971 671.239990 690.309998 690.309998 3817500

250 rows × 6 columns

Working with CSV

Export to CSV

In [16]:
[pd.DataFrame(dfm2[symbol]).to_csv('data/'+symbol+'.csv') for symbol in faang_stocks]
print('Done')
Done

Working with HTML

Read from HTML

In [17]:
# Reads the two tables from https://en.wikipedia.org/wiki/List_of_S%26P_500_companies
# into sp500[0] and sp500[1] respectively
# The first table (sp500[0]) has a column named 'Symbol'

sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
sp500[0]['Symbol']
Out[17]:
0       MMM
1       ABT
2      ABBV
3      ABMD
4       ACN
       ... 
500     YUM
501    ZBRA
502     ZBH
503    ZION
504     ZTS
Name: Symbol, Length: 505, dtype: object
In [18]:
# Convert the pandas Series to a Python list

stockslist = list(sp500[0]['Symbol'])
stockslist[:10]
Out[18]:
['MMM', 'ABT', 'ABBV', 'ABMD', 'ACN', 'ATVI', 'ADBE', 'AMD', 'AAP', 'AES']

Interactive Visualization of Time Series

Plotting a single series

When we import the cufflibnks library, all pandas dataframes and series objects have a new method .iplot() attached to them (similar to the pandas' .plot() method)

In [19]:
df3.index = pd.to_datetime(df3.index)
df3.index
Out[19]:
DatetimeIndex(['2021-01-04', '2021-01-05', '2021-01-06', '2021-01-07',
               '2021-01-08', '2021-01-11', '2021-01-12', '2021-01-13',
               '2021-01-14', '2021-01-15',
               ...
               '2021-10-18', '2021-10-19', '2021-10-20', '2021-10-21',
               '2021-10-22', '2021-10-25', '2021-10-26', '2021-10-27',
               '2021-10-28', '2021-10-29'],
              dtype='datetime64[ns]', name='Date', length=209, freq=None)
In [20]:
df3['Adj Close'].iplot(kind='line', title='SPY Year to Date Adjusted Close')
In [21]:
df3[-30:].iplot(kind='ohlc', title = 'SPY OHLC Plot')
# df3[-30:].iplot(kind='candle', title = 'SPY OHLC Plot')

Plotting multiple series

In [22]:
# Using secondary axis (AAPL price will be displayed on the right)

dfm1[['AMZN', 'AAPL']].iplot(title = 'Amazon Vs Apple', secondary_y = 'AAPL')
In [23]:
# Using Subplots

dfm1[['AMZN', 'AAPL']].iplot(title = 'Amazon Vs Apple', subplots = True)

Normalized Plot

In [24]:
dfm1.normalize().iplot(title = 'FAANG Stocks')

Box Plot

In [25]:
spy = yf.download('SPY', start='2015-01-02', end = '2020-12-31', progress=False)
In [26]:
years = spy.index.year.unique()
years
Out[26]:
Int64Index([2015, 2016, 2017, 2018, 2019, 2020], dtype='int64', name='Date')
In [27]:
newdf = pd.DataFrame()

for year in years:
    newdf[year] = pd.Series(spy[spy.index.year == year]['Adj Close']).reset_index(drop=True)

newdf.tail()
Out[27]:
2015 2016 2017 2018 2019 2020
247 184.935547 207.303436 250.171707 234.878174 312.286224 364.009186
248 184.512985 207.817764 250.293350 236.681427 313.948578 365.425323
249 186.482040 206.100266 250.808365 236.376083 313.870819 368.564636
250 185.160309 206.054352 249.862701 238.446487 312.140381 367.861511
251 183.308121 205.301254 NaN NaN 312.898651 368.386383
In [28]:
# Code to demonstrate that when you add two series to a Dataframe, the first series should be the longest one.
# Else the subsequent series will be truncated. 

anotherdf1 = pd.DataFrame()
anotherdf2 = pd.DataFrame()

s1 = pd.Series({'A':1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'H': 8, 'I':9})
s2 = pd.Series([4, 5])

anotherdf1[0] = s1.reset_index(drop=True)
anotherdf1[1] = s2.reset_index(drop=True)

display(anotherdf1)

anotherdf2[0] = s2.reset_index(drop=True)
anotherdf2[1] = s1.reset_index(drop=True)

display(anotherdf2)
0 1
0 1 4.0
1 2 5.0
2 3 NaN
3 4 NaN
4 5 NaN
5 6 NaN
6 7 NaN
7 8 NaN
8 9 NaN
0 1
0 4 1
1 5 2
In [29]:
# Fill Forward - Use Friday's data to fill today's missing value
# Fill Backward - Use Monday's data to fill Friday's missing value

newdff = newdf.ffill(axis = 0)
In [30]:
newdff.tail()
Out[30]:
2015 2016 2017 2018 2019 2020
247 184.935547 207.303436 250.171707 234.878174 312.286224 364.009186
248 184.512985 207.817764 250.293350 236.681427 313.948578 365.425323
249 186.482040 206.100266 250.808365 236.376083 313.870819 368.564636
250 185.160309 206.054352 249.862701 238.446487 312.140381 367.861511
251 183.308121 205.301254 249.862701 238.446487 312.898651 368.386383
In [31]:
newdff.describe().T
Out[31]:
count mean std min 25% 50% 75% max
2015 252.0 183.070148 4.719589 166.520432 180.793709 184.818298 186.488888 189.035019
2016 252.0 189.998624 10.193114 164.417114 184.795841 190.860931 197.377594 207.960541
2017 252.0 226.644708 11.406554 206.871735 217.571728 225.245232 235.173141 251.117310
2018 252.0 258.594767 9.651974 223.581741 252.311096 257.903671 265.159851 277.220886
2019 252.0 279.808099 15.834313 232.998627 269.750114 280.123596 289.129562 313.948578
2020 252.0 314.750216 32.171057 218.017075 296.677284 319.134964 334.766808 368.564636
In [32]:
newdff.iplot(kind='box', title='SPY Box Plot', yTitle = 'Adj Close', legend = False, boxpoints = 'outliers')