import pandas as pd
import altair as alt
import re
dfc_release = pd.read_html('https://www.census.gov/construction/c30/release.html')[0]
#dfc_release.head()
# https://www.census.gov/construction/c30/release.html
dfc = pd.read_excel('https://www.census.gov/construction/c30/xls/tottime.xls', skiprows=3).iloc[:311]
dfc['dt'] = dfc.Date.map(lambda d:
pd.to_datetime(d, format='%b-%y') if d[-1] not in ['r', 'p']
else pd.to_datetime(d[:-1], format='%b-%y'))
dfc.columns = [re.sub("[^A-z]", "", c) for c in dfc.columns]
#dfc
alt.Chart(dfc[['dt','TotalConstruction']]).mark_line(color='indigo').encode(
alt.X('dt', axis=alt.Axis(title='')),
alt.Y('TotalConstruction:Q', axis=alt.Axis(title='Monthly Spending [Million USD]')),
tooltip=[alt.Tooltip('dt:T', format='%B - %Y', title='Period'), alt.Tooltip('TotalConstruction:Q', title='% Change')]
).properties(
title='New US Construction Spending (unadjusted)',
height=450,
width=750,
)
yoy_construction = dfc.set_index('dt')['TotalConstruction'].sort_index().dropna()\
.pct_change(12).apply(lambda v: v * 100.).reset_index()
c = alt.Chart(yoy_construction[['dt','TotalConstruction']]).mark_bar(width=1.5).encode(
alt.X('dt', axis=alt.Axis(title='')),
alt.Y('TotalConstruction:Q', axis=alt.Axis(title='Year over year spending growth [%]')),
color=alt.condition("datum['TotalConstruction'] < 0",
alt.value('lightsalmon'),
alt.value('steelblue')
),
tooltip=[alt.Tooltip('dt:T', format='%B - %Y', title='Period'),
alt.Tooltip('TotalConstruction:Q', title='% Change', format=',.02f')]
).properties(
title='New US Construction Spending Growth (unadjusted)',
height=450,
width=750,
background="white"
)
#c.save('construction.png')
c.display()
yoy_construction_r = dfc.set_index('dt')['TotalResidential'].sort_index().dropna()\
.pct_change(12).apply(lambda v: v * 100.).reset_index()
cr = alt.Chart(yoy_construction_r[['dt','TotalResidential']]).mark_bar(width=1.5).encode(
alt.X('dt', axis=alt.Axis(title='')),
alt.Y('TotalResidential:Q', axis=alt.Axis(title='Year over year spending growth [%]')),
color=alt.condition("datum['TotalResidential'] < 0",
alt.value('lightsalmon'),
alt.value('steelblue')
),
tooltip=[alt.Tooltip('dt:T', format='%B - %Y', title='Period'),
alt.Tooltip('TotalResidential:Q', title='% Change', format=',.02f')]
).properties(
title='New US Residential Construction Spending Growth (unadjusted)',
height=450,
width=750,
background="white"
)
#c.save('construction.png')
cr.display()
yoy_construction_c = dfc.set_index('dt')['TotalCommercial'].sort_index().dropna()\
.pct_change(12).apply(lambda v: v * 100.).reset_index()
cc = alt.Chart(yoy_construction_c[['dt','TotalCommercial']]).mark_bar(width=1.5).encode(
alt.X('dt', axis=alt.Axis(title='')),
alt.Y('TotalCommercial:Q', axis=alt.Axis(title='Year over year spending growth [%]')),
color=alt.condition("datum['TotalCommercial'] < 0",
alt.value('lightsalmon'),
alt.value('steelblue')
),
tooltip=[alt.Tooltip('dt:T', format='%B - %Y', title='Period'),
alt.Tooltip('TotalCommercial:Q', title='% Change', format=',.02f')]
).properties(
title='New US Commercial Construction Spending Growth (unadjusted)',
height=450,
width=750,
background="white"
)
#c.save('construction.png')
cc.display()
step = 150
overlap = 5
def doRidgeLineFor(df, x, y, row, title='US Construction Spending Growth [Census Bureau]'):
return alt.Chart(df, height=step).transform_joinaggregate(
mean_val=f'mean({x})', groupby=[row],
).mark_area(
interpolate='monotone',
fillOpacity=0.8,
stroke='lightgray',
strokeWidth=0.5
).encode(
alt.X(f'{x}:T', title=''),
alt.Y(
f'{y}:Q',
scale=alt.Scale(range=[step, -step * overlap]),
axis=None
),
alt.Fill(
f'{row}:N',
title='',
#legend=None,
scale=alt.Scale(scheme='dark2')
),
tooltip=[alt.Tooltip(f'{x}:T'),
alt.Tooltip(f'{row}:N', title='Sector'),
alt.Tooltip(f'{y}:Q', format=',.02f', title="YoY % Change")]
).facet(
row=alt.Row(
f'{row}:N',
title=None,
header=alt.Header(labelColor='white') #, labelAnchor='end')
)
).properties(
title=title,
bounds='flush'
).configure_facet(
spacing=5
).configure_view(
stroke=None,
width=650
).configure_title(
anchor='middle'
)
Note: click/hover/mouseover over the chart to see the value for that date
alt.data_transformers.disable_max_rows()
onlySectors = dfc[[c for c in dfc.columns if c != "Date"]]
onlySectors = onlySectors.set_index("dt")
yoySectors = onlySectors.pct_change(12).apply(lambda v: v * 100)
melted = yoySectors.reset_index().melt(id_vars="dt")
melted = melted[melted.value > -99]
#c =
doRidgeLineFor(melted[['dt', 'variable', 'value']].dropna(), 'dt', 'value', 'variable')