import pandas as pd
import altair as alt
import numpy as np
import re
# Imports for reading FRED API
from os import environ
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
try:
# for local execution
apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
apiKeyFromFile = None
pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)
def getSeries(series="", apiKey=apiKey, description=None):
# Construct URL template, fetching as much data as possible
fetchCommand = "https://api.stlouisfed.org/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt"
# Call the FRED API
resp = urlopen(fetchCommand.format(s=series, k=apiKey))
# Read and extract the data from the Zipfile response
zipfile = ZipFile(BytesIO(resp.read()))
filesInZip = zipfile.namelist()
data = zipfile.open(filesInZip[1])
if description is None:
description = series
# Make a well-formed dataframe
df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
names=["date", description, "rt_start", "rt_end"], na_values=".")
df['date'] = pd.to_datetime(df.date)
return df.set_index("date")
df_cpi = getSeries("CWUR0000SA0")
descriptionForCode = {
'441': 'Motor vehicle and parts dealers',
'4411,4412': 'Automobile and other motor vehicle dealers',
'4411': 'Automobile dealers',
'44111': 'New car dealers',
'44112': 'Used car dealers',
'4413': 'Automotive parts, acc., and tire stores',
'442,443': 'Furniture, home furn, electronics, and appliance stores',
'442': 'Furniture and home furnishings stores',
'4421': 'Furniture stores',
'4422': 'Home furnishings stores',
'44221': 'Floor covering stores',
'442299': 'All other home furnishings stores',
'443': 'Electronics and appliance stores',
'443141': 'Household appliance stores',
'443142': 'Electronics stores',
'444': 'Building mat. and garden equip. and supplies dealers',
'4441': 'Building mat. and supplies dealers',
'44412': 'Paint and wallpaper stores',
'44413': 'Hardware stores',
'445': 'Food and beverage stores',
'4451': 'Grocery stores',
'44511': 'Supermarkets and other grocery (except convenience) stores',
'4453': 'Beer, wine, and liquor stores',
'446': 'Health and personal care stores',
'44611': 'Pharmacies and drug stores',
'447': 'Gasoline stations',
'448': 'Clothing and clothing access. stores',
'4481': 'Clothing stores',
'44811': "Men's clothing stores",
'44812': "Women's clothing stores",
'44814': 'Family clothing stores',
'44819': 'Other clothing stores',
'4482': 'Shoe stores',
'44831': 'Jewelry stores',
'451': 'Sporting goods, hobby, musical instrument, and book stores',
'45111': 'Sporting goods stores',
'45112': 'Hobby, toy, and game stores',
'451211': 'Book stores',
'452': 'General merchandise stores',
'4521': 'Department stores',
'452111': 'Department stores(excl. discount department stores)',
'452112': 'Discount dept. stores',
'4529': 'Other general merchandise stores',
'45291': 'Warehouse clubs and superstores',
'45299': 'All other gen. merchandise stores',
'453': 'Miscellaneous store retailers',
'4532': 'Office supplies, stationery, and gift stores',
'45321': 'Office supplies and stationery stores',
'45322': 'Gift, novelty, and souvenir stores',
'45330': 'Used merchandise stores',
'454': 'Nonstore retailers',
'4541': 'Electronic shopping and mail-order houses',
'45431': 'Fuel dealers',
'722': 'Food services and drinking places',
'7224': 'Drinking places(3)',
'7225': 'Restaurants and other eating places',
'722511': 'Full service restaurants',
'722513, 722514, 722515': 'Limited service eating places',
'441': 'Motor vehicle and parts dealers',
'4411,4412': 'Automobile and other motor vehicle dealers',
'4413': 'Automotive parts, acc., and tire stores',
'442,443': 'Furniture, home furn, electronics, and appliance stores',
'442': 'Furniture and home furnishings stores',
'443': 'Electronics and appliance stores',
'444': 'Building mat. and garden equip. and supplies dealers',
'4441': 'Building mat. and supplies dealers',
'445': 'Food and beverage stores',
'4451': 'Grocery stores',
'4453': 'Beer, wine and liquor stores',
'446': 'Health and personal care stores',
'44611': 'Pharmacies and drug stores',
'447': 'Gasoline stations',
'448': 'Clothing and clothing access. stores',
'4481': 'Clothing stores',
'44811': "Men's clothing stores",
'44812': "Women's clothing stores",
'4482': 'Shoe stores',
'44831': 'Jewelry stores',
'451': 'Sporting goods, hobby, musical instrument, and book stores',
'452': 'General merchandise stores',
'4521': 'Department stores',
'4529': 'Other general merchandise stores',
'45291': 'Warehouse clubs and superstores',
'45299': 'All other gen. merchandise stores',
'453': 'Miscellaneous stores retailers',
'454': 'Nonstore retailers',
'4541': 'Electronic shopping and mail order houses',
'45431': 'Fuel dealers',
'722': 'Food services and drinking places',
}
dfa = pd.read_excel('https://www.census.gov/retail/marts/www/marts_current.xls', skiprows=7)
month = dfa.columns[4].split('.')[0]
year = dfa.iloc[0, 2]
advanceReleaseMonthYear = "{}. {}".format(re.sub("[^A-z]", "", month), year)
#print(advanceReleaseMonthYear)
dfa.columns = ['NAICS', 'Description', f'{year} Total', f'{year} Change',
advanceReleaseMonthYear] + dfa.columns.to_list()[5:]
dfa = dfa.iloc[3:] #.dropna()
# Fill down some codes where label spans two rows
dfa.loc[23, 'NAICS'] = dfa.loc[22, 'NAICS']
dfa.loc[36, 'NAICS'] = dfa.loc[35, 'NAICS']
dfa.loc[44, 'NAICS'] = dfa.loc[43, 'NAICS']
dfa.loc[3, 'Description'] = 'Retail and food services sales, total'
dfa.loc[4, 'Description'] = 'Retail sales and food services excl motor vehicle and parts'
dfa.loc[5, 'Description'] = 'Retail sales and food services excl gasoline stations'
dfa.loc[7, 'Description'] = 'Retail sales and food services excl motor vehicle and parts and gasoline stations'
dfa.loc[8, 'Description'] = 'Retail sales, total'
# Clean up records without a numeric value in the column of interest
dfa = dfa[~dfa.iloc[:, 4].isnull() & ~dfa.iloc[:, 4].isin(['(*)'])]
dfa.loc[11:, 'Description'] = dfa.loc[11:, 'NAICS'].map(lambda v: descriptionForCode.get(str(v).replace(" ", "")))
df_current = dfa[['Description', advanceReleaseMonthYear]].set_index('Description').apply(pd.to_numeric).T
#dfa.head(50)
# Encode the monthly retail sales column names
yearColumns = lambda y : map(lambda s: s.format(y), ['Jan. {}', 'Feb. {}', 'Mar. {}', 'Apr. {}', 'May. {}', 'Jun. {}', 'Jul. {}', 'Aug. {}', 'Sep. {}', 'Oct. {}', 'Nov. {}', 'Dec. {}', 'Total {}'])
standardColumns = lambda y : ['NAICS', 'Description'] + list(yearColumns(y))
currentYearColumns = lambda y: [s.format(y) for s in ['{} CUM', '{} PY CUM']]
# Use the month of the advance release to find the newest column in monthly historical
advanceReleaseIndex = list(yearColumns(year)).index(advanceReleaseMonthYear)
# Note, need to add to 2020 as new data is released
# TODO, try to automate this better
oneOffYears = {
'2022': standardColumns('2022')[:2] + list(yearColumns('2022'))[:advanceReleaseIndex] + currentYearColumns('2022'),
'2020': standardColumns('2020') + ['IGNORE20'],
'2016': standardColumns('2016') + ['IGNORE16']
}
# Fetch the historical monthly retail sales data
# set sheet_name to none to read all sheets in the XLS
dfs = pd.read_excel('https://www.census.gov/retail/mrts/www/mrtssales92-present.xls', sheet_name=None)
# parse the results sheet-by-sheet, normalizing the columns names and index
allDfs = []
for y in dfs.keys():
#print(y)
#print(dfs[y].iloc[:6])
try:
dfs[y].columns = oneOffYears.get(y) or standardColumns(y)
except ValueError:
dfs[y].columns = standardColumns(y)
dfs[y] = dfs[y].drop('NAICS', axis=1)
#print(dfs[y].iloc[5:109].set_index('Description').index.is_unique)
allDfs.append(dfs[y].iloc[5:109].set_index('Description'))
# workaround for merging the sheets into a single dataframe
old = pd.concat(allDfs[3:], axis=1)
new = pd.concat(allDfs[:3], axis=1)
combined = new.join(old).drop('ADJUSTED(2)').T
# Use the set of columns to reshape the current report data
currentAligned = pd.DataFrame.from_records({ c: None if df_current.get(c) is None else df_current.get(c).values[0] for c in combined.columns },
index=[pd.to_datetime(advanceReleaseMonthYear, format='%b. %Y')])
currentAligned = currentAligned.rename_axis('dt', axis=0)
#combined.head()
# subset the combined dataset to the monthly report records
tsNormalized = combined[combined.index.map(lambda v: re.search('[A-z]{3}.? \d{4}$', v) is not None and 'Total' not in v)].copy()
# create a proper datetime column
# note: (p) "preliminary" records lose their designation and are treated the same as final records
tsNormalized['dt'] = tsNormalized.index.map(lambda v: pd.to_datetime(v.replace('(p)', ''), format='%b. %Y')
if '.' in v else pd.to_datetime(v, format='%b %Y'))
# reindex the dataframe on the datetime field and ensure all values are numeric
dt = tsNormalized.set_index('dt')
dt = dt.apply(pd.to_numeric, errors='coerce')
#dt.head()
# Use the most common value when multiple samples are available
def mode(x):
m = pd.Series.mode(x);
return m.values[0] if not m.empty else np.nan
# Melt and re-aggregate to combine duplicate categories
df_melted = dt.reset_index().melt(id_vars='dt')
fix = df_melted.Description == "Beer, wine and liquor stores"
df_melted.loc[fix, 'Description'] = 'Beer, wine, and liquor stores'
fix2 = df_melted.Description == "Men's clothing stores"
df_melted.loc[fix2, 'Description'] = 'Mens clothing stores'
fix3 = df_melted.Description == "Women's clothing stores"
df_melted.loc[fix3, 'Description'] = 'Womens clothing stores'
fix4 = df_melted.Description == 'Retail sales, total (excl. motor vehicle and parts dealers)'
df_melted.loc[fix4, 'Description'] = 'Retail sales and food services excl motor vehicle and parts'
df_agg = df_melted.groupby(['dt', 'Description']).agg(mode).reset_index('Description')
#df_agg.head()
# Reshape the dataframe so each monhtly report is a row and each category is a column
df_combined = df_agg.pivot(columns='Description')
df_combined.columns = df_combined.columns.droplevel()
# Merge the current and historical reports
df_combined = pd.concat([currentAligned, df_combined]).sort_index()
#df_combined.tail()
def doLineChartFor(metric, df, yLabel='Revenue [Million USD]', color='purple'):
return alt.Chart(df.reset_index()[['dt', metric]].dropna()).mark_line(color=color).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y(f'{metric}:Q', axis=alt.Axis(title=yLabel)),
tooltip=[alt.Tooltip('dt:T'), alt.Tooltip(f'{metric}:Q', title='$', format=",.0f")]
).properties(
title=f'US: {metric} - over time',
height=500,
width=750
)
doLineChartFor('Furniture, home furn, electronics, and appliance stores', df_combined)
def doYoYChartFor(metric, df, color='royalblue'):
yoy = df[[metric]].pct_change(12).apply(lambda v: v*100.).sort_index()
return alt.Chart(yoy[-160:].reset_index()).mark_bar(size=2.5).transform_filter(f"datum['{metric}'] < 100").encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y(f'{metric}:Q', axis=alt.Axis(title='Year over Year Revenue Growth [%]'.format(metric))),
color=alt.condition(f"datum['{metric}'] < 0",
alt.value('tomato'),
alt.value(color)
),
tooltip=[alt.Tooltip('dt:T'), alt.Tooltip(f'{metric}:Q', title='YoY Change [%]', format=',.02f')]
).properties(
title=f'US: {metric} - growth over time',
height=500,
width=750
)
def doComboChartFor(metric, df, yLabel='Revenue [Million USD]', color='royalblue', background=True):
pmp = df[metric].sort_index().copy().reset_index().dropna()
brush = alt.selection(type='interval', encodings=['x'])
base = alt.Chart(pmp).mark_line(color='purple').encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y')),
alt.Y(f'{metric}:Q', axis=alt.Axis(title=yLabel)),
tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip(f"yoy:Q", format=",.02f")]
).properties(
width=750,
height=450
)
upper = base.mark_bar(color=color, size=2.5).transform_window(
window=[alt.WindowFieldDef("lag", metric, param=12, **{ 'as': 'previous' })],
sort=[{"field": "dt"}],
).transform_calculate(
yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
).transform_filter("datum['yoy'] < 100").encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
alt.Y('yoy:Q', axis=alt.Axis(title='Year over Year Revenue Growth [%]')),
color=alt.condition("datum['yoy'] < 0",
alt.value('lightsalmon'),
alt.value(color)
),
tooltip=[alt.Tooltip('dt:T', format='%B %Y', title='Period'), alt.Tooltip('yoy:Q', format=',.02f')]
).properties(
title=f'US: {metric}',
)
lower = base.properties(
height=100
).add_selection(brush)
return (upper & lower).properties(background='white') if background else (upper & lower)
c = doYoYChartFor('Furniture, home furn, electronics, and appliance stores', df_combined)
c.save('retailsales.png')
#c.display()
doComboChartFor('Furniture, home furn, electronics, and appliance stores', df_combined)
doComboChartFor('Retail sales, total', df_combined, 'Monthly Revenue [Million USD]')
doComboChartFor('Motor vehicle and parts dealers', df_combined)
toInclude = [
'Retail sales, total',
'Motor vehicle and parts dealers',
'Nonstore retailers',
'Furniture, home furn, electronics, and appliance stores'
]
tmp = df_combined[toInclude].copy()
tmp.columns = ['Retail sales, total', 'Retail sales excluding motor vehicles',
'Nonstore retailers', 'Furniture, home furn, electronics, and appliance stores']
#tmp['Retail sales, motor vehicles and parts'] = tmp['Motor vehicle and parts dealers']
tmp = tmp.join(df_cpi['CWUR0000SA0'], how='inner')
tmp.columns = ['Retail Total', 'Retail Motor Vehicles',
'Nonstore retailers', 'Furniture, home furn, electronics, and appliance stores', 'CPI']
tmp['Total-adj'] = tmp['Retail Total'] / tmp['CPI']
tmp['Auto-adj'] = tmp['Retail Motor Vehicles'] / tmp['CPI']
tmp['Nonstore retailers'] = tmp['Nonstore retailers'] / tmp['CPI']
tmp['Furniture, home furn, electronics, and appliance stores'] = tmp['Furniture, home furn, electronics, and appliance stores'] / tmp['CPI']
chartable = tmp.reset_index()[['index', 'Total-adj', 'Auto-adj']].melt(id_vars='index')
alt.Chart(chartable).mark_line().encode(
alt.X('index:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Revenue [Million 1982-1984 USD]')),
alt.Color('variable:N'),
tooltip=[alt.Tooltip('index:T', title='Date'), alt.Tooltip(f'value:Q', title='Revenue [Million $]', format=',.0f')]
).properties(
title=['US: CPI-adjusted Retail Sales Revenue', '(excluding food services)'],
height=500,
width=700
)
df_adj = tmp['Total-adj'].reset_index().copy()
df_adj.columns = ['dt', 'Retail Sales Adj']
df_adj = df_adj.groupby('dt').agg('mean').reset_index()
doComboChartFor('Retail Sales Adj', df_adj.set_index('dt'))
doComboChartFor('Nonstore retailers', df_combined)
df_adj_e = tmp['Nonstore retailers'].reset_index().copy()
df_adj_e.columns = ['dt', 'Nonstore retailers']
doComboChartFor('Nonstore retailers', df_adj_e.set_index('dt'))
df_adj_f = tmp['Furniture, home furn, electronics, and appliance stores'].reset_index().copy()
df_adj_f.columns = ['dt', 'Home Furnishings']
doComboChartFor('Home Furnishings', df_adj_f.set_index('dt'))
charts = []
for s in df_melted.Description.unique():
if s in ['Retail sales, total',
'Retail sales excluding motor vehicles',
'Nonstore retailers',
'Furniture, home furn, electronics, and appliance stores',
'All other gen. merchandise stores',
'Automotive parts, acc., and tire stores',
'Building mat. and garden equip. and supplies dealers',
'Building mat. and supplies dealers',
'Clothing and clothing access. stores',
'Department stores(excl. discount department stores)',
'Discount dept. stores']:
continue
charts.append(doComboChartFor(s, df_combined, background=False)) #doYoYChartFor(s, df_combined) & doLineChartFor(s, df_combined))
alt.vconcat(*charts)