import pandas as pd
import altair as alt
import re
#%%time
books = {
'Shipments': 'https://www.census.gov/manufacturing/m3/prel/historical_data/histshts/naics/naicsvsp.xls',
'NewOrders': 'https://www.census.gov/manufacturing/m3/prel/historical_data/histshts/naics/naicsnop.xls',
'UnfilledOrders': 'https://www.census.gov/manufacturing/m3/prel/historical_data/histshts/naics/naicsuop.xls',
'TotalInventories': 'https://www.census.gov/manufacturing/m3/prel/historical_data/histshts/naics/naicsinvp.xls',
'InventoriesToShipments': 'https://www.census.gov/manufacturing/m3/prel/historical_data/histshts/naics/naicsisp.xls',
'UnfilledOrdersToShipments': 'https://www.census.gov/manufacturing/m3/prel/historical_data/histshts/naics/naicsusp.xls',
}
names = ['Code', 'Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
dfs = { b: pd.read_excel(u, header=None, names=names).set_index('Year') for b, u in books.items() }
def reshapeFor(df, code):
subset = df[df.Code == code].drop('Code', axis=1).reset_index().copy()
melted = subset.melt(id_vars=['Year'])
melted['dt'] = pd.to_datetime(melted['Year'].astype(str) + melted['variable'], format='%Y%b')
return melted
def chartFor(df, code, label):
melted = reshapeFor(df, code)
return alt.Chart(melted.sort_values('dt')).mark_line(color='indigo').encode(
alt.X('dt', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Monthly Revenue [Million USD]')),
tooltip=[alt.Tooltip('value:Q'), alt.Tooltip('dt:T', format='%B - %Y')]
).properties(
title=label,
height=450,
width=750,
)
dfm = dfs['NewOrders']
#chartFor(dfm, 'UMXDNO', 'New US Manufacturing Orders (unadjusted, excluding defense)')
def reshapeYoyFor(df, code):
melted = reshapeFor(df, code)
yoy_orders = melted.set_index('dt')['value'].sort_index().dropna()\
.pct_change(12).apply(lambda v: v * 100.).reset_index()
return yoy_orders
def yoyChartFor(df, code='UMXDNO', label='New US Manufacturing Order Growth (unadjusted, excluding defense)'):
yoy_orders = reshapeYoyFor(df, code)
return alt.Chart(yoy_orders[-150:]).mark_bar(color='blue', size=2.5).encode(
alt.X('dt', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Year over Year Revenue Growth [%]')),
color=alt.condition(f"datum['value'] < 0",
alt.value('tomato'),
alt.value('blue')
),
tooltip=[alt.Tooltip('dt:T', format='%B - %Y', title='Period'),
alt.Tooltip('value:Q', title='% Change', format=',.02f')]
).properties(
title=label,
height=450,
width=750,
background="white"
)
def doComboChart(df, code='UMXDNO', title="New US Manufacturing Order Growth (unadjusted, excluding defense)", label='Monthly Revenue [Million USD]', period=12, size=1.5, color='blue'):
df = reshapeFor(df, code)
brush = alt.selection(type='interval', encodings=['x'])
base = alt.Chart(df).mark_line(color='purple').encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y')),
alt.Y(f'value:Q', axis=alt.Axis(title=label)),
tooltip=[
alt.Tooltip("dt:T", format="%b %Y"),
alt.Tooltip(f"value:Q")
]
).properties(
width=750,
height=450
)
upper = base.mark_bar(size=size, color=color).transform_window(
window=[alt.WindowFieldDef("lag", 'value', param=period, **{ 'as': 'previous' })],
sort=[{"field": "dt"}],
).transform_calculate(
yoy=f"((datum['value'] - datum.previous) / datum.previous) * 100"
).transform_filter(
(alt.datum.yoy < 400) & (alt.datum.yoy > -90)
).encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y'), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
alt.Y('yoy:Q', axis=alt.Axis(title='Year-over-year Growth [%]'), impute=alt.ImputeParams(method='value', keyvals=[100, 1000])),
color=alt.condition(f"datum['yoy'] < 0",
alt.value('lightsalmon'),
alt.value(color)
),
tooltip=[
alt.Tooltip('dt:T', format='%b %Y', title=''),
alt.Tooltip(f'yoy:Q', format=',.0f', title='Year-over-year Growth [%]')
]
).properties(
title=f"{title}"
)
lower = base.properties(
height=100
).add_selection(brush)
return (upper & lower).properties(
background='white'
)
c = doComboChart(dfm)
c.save('manufacturing.png')
c.display()
doComboChart(dfs['TotalInventories'], 'UMXDTI', 'US Manufacturing Inventories (unadjusted, excluding defense)')
doComboChart(dfs['Shipments'], 'UMXDVS', 'US Manufacturing Shipments (unadjusted, excluding defense)')
doComboChart(dfs['UnfilledOrders'], 'UMXDUO', 'US Manufacturing Unfilled Orders (unadjusted, excluding defense)')
doComboChart(dfs['InventoriesToShipments'], 'UMTMIS', 'US Manufacturing Inventories To Shipments Ratio (unadjusted, excluding defense)', label='Ratio')
doComboChart(dfs['UnfilledOrdersToShipments'], 'UMTMUS', 'US Manufacturing Unfilled Orders to Shipments Ratio (unadjusted)', label='Ratio')
doComboChart(dfs['UnfilledOrdersToShipments'], 'UMTMUS', 'US Manufacturing Unfilled Orders to Shipments Ratio Growth (unadjusted)')
alt.data_transformers.disable_max_rows()
def doRidgeLineFor(df, x, y, row, metric='New Orders', step=100, overlap=2):
# Convert snakeCase to Snake Case
label = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", metric)
return alt.Chart(df, height=step).transform_joinaggregate(
mean_val=f'mean({x})', groupby=[row]
).mark_area(
interpolate='monotone',
fillOpacity=0.8,
stroke='lightgray',
strokeWidth=0.5
).encode(
alt.X(f'{x}:T', title=''),
alt.Y(
f'{y}:Q',
scale=alt.Scale(range=[step, -step * overlap]),
axis=None
),
alt.Fill(
f'{row}:N',
title='Category',
#legend=None,
scale=alt.Scale(scheme='category20')
),
tooltip=[alt.Tooltip(f'{x}:T'), alt.Tooltip(f'{row}:N'), alt.Tooltip(f'{y}:Q', format=',.02f')]
).facet(
row=alt.Row(
f'{row}:N',
title=None,
header=alt.Header(labelColor='white') #, labelAnchor='end')
)
).properties(
title=f"{label} Growth By Category",
bounds='flush'
).configure_facet(
spacing=0
).configure_view(
stroke=None,
width=650
).configure_title(
anchor='middle'
)
def doCategory(allDfs, categories, metric='NewOrders', excludes=[]):
df = allDfs[metric]
combined = pd.DataFrame()
for c, l in categories[metric]:
if c in excludes:
continue
#yoyChartFor(dfm, c, l).display()
tmp = reshapeYoyFor(df, c)
tmp['variable'] = tmp.index.map(lambda v: l)
combined = pd.concat([combined, tmp])
return doRidgeLineFor(combined[['dt', 'variable', 'value']].dropna(), 'dt', 'value', 'variable', metric)
seriesCodes = {
'MTM': 'Total Manufacturing',
'MXT': 'Manufacturing Excluding Transportation',
'MXD': 'Manufacturing Excluding Defense',
'MTU': 'Manufacturing with Unfilled Orders',
'MDM': 'Durable Goods',
'CMS': 'Construction Materials and Supplies',
'ITI': 'Information Technology Industries',
'CRP': 'Computers and Related Products',
'MVP': 'Motor Vehicles and Parts',
'TCG': 'Capital Goods',
'NDE': 'Nondefense Capital Goods',
'NXA': 'Nondefense Capital Goods Excluding Aircraft',
'DEF': 'Defense Capital Goods',
'COG': 'Consumer Goods',
'CDG': 'Consumer Durable Goods',
'CNG': 'Consumer Nondurable Goods',
'DXT': 'Durable Goods Excluding Transportation',
'DXD': 'Durable Goods Excluding Defense',
'34D': 'Communications equipment manufacturing, nondefense',
'34E': 'Communications equipment manufacturing, defense',
'ODG': 'Other Durable Goods',
}
metricCodes = {
'VS': 'Shipments',
'NO': 'NewOrders',
'UO': 'UnfilledOrders',
'TI': 'TotalInventories',
'IS': 'InventoriesToShipments',
'US': 'UnfilledOrdersToShipments',
}
categories = { k: [] for k in metricCodes.values() }
for k, v in metricCodes.items():
#print(k, v)
#categories[k]['label'] = v
#categories[v] = []
for k2, v2 in seriesCodes.items():
categories[v].append((f"U{k2}{k}", v2))
doCategory(dfs, categories, 'TotalInventories', excludes=['U34ETI', 'UDEFTI'])
doCategory(dfs, categories, 'NewOrders', excludes=['U34ENO', 'UDEFNO'])
doCategory(dfs, categories, 'Shipments', excludes=['U34ETI', 'UDEFTI'])
doCategory(dfs, categories, 'UnfilledOrders', excludes=['U34ETI', 'UDEFTI'])
doCategory(dfs, categories, 'InventoriesToShipments', excludes=['U34ETI', 'UDEFTI'])
doCategory(dfs, categories, 'UnfilledOrdersToShipments', excludes=['U34ETI', 'UDEFTI'])