A look at residental real estate, according to the Federal Reserve, the Federal Housing Authority, and Redfin.
import pandas as pd
import altair as alt
import numpy as np
import re
from os import environ
from datetime import datetime
try:
# for local execution
apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
apiKeyFromFile = None
pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)
if environ.get("GITHUB_WORKFLOW"):
raise RuntimeError("Requires manual updates")
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
def getSeries(series="", apiKey=apiKey, description=None):
fetchCommand = "https://api.stlouisfed.org/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt"
resp = urlopen(fetchCommand.format(s=series, k=apiKey))
zipfile = ZipFile(BytesIO(resp.read()))
filesInZip = zipfile.namelist()
data = zipfile.open(filesInZip[1])
if description is None:
description = series
df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
names=["date", description, "rt_start", "rt_end"], na_values=".")
df['date'] = pd.to_datetime(df.date)
return df.set_index("date")
df = getSeries("USSTHPI")
df2 = df.resample('1Q').mean()
subset = df2[df2.index <= '2000-12-31']
y = subset.USSTHPI.values
x = [n for n, v in enumerate(subset.index.values)]
z = np.polyfit(x, y, 1)
# 1 * x^0 + b
trend = np.add(np.multiply(z[1], 1), np.multiply(range(df2.shape[0]), z[0]))
(
alt.Chart(pd.DataFrame(index=df2.index, data=trend, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +\
alt.Chart(df2.reset_index()).mark_line(color='black').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('USSTHPI:Q', axis=alt.Axis(title='Price Index [1980-Q1 = 100]'))
)
).properties(
title='US FHA House Price Index',
width=750,
height=400,
background='white'
)
dfcs = getSeries("CSUSHPINSA").resample('1Q').mean()
#dfcs.head()
subset2 = dfcs[dfcs.index <= '2000-12-31']
y2 = subset2.CSUSHPINSA.values
x2 = [n for n, v in enumerate(subset2.index.values)]
z2 = np.polyfit(x2, y2, 1)
trend2 = np.add(np.multiply(z2[1], 1), np.multiply(range(dfcs.shape[0]), z2[0]))
(
alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +\
alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
)
).properties(
title='S&P/Case-Shiller U.S. National Home Price Index',
width=750,
height=400,
background='white'
)
now = datetime.now().strftime("%d-%m-%Y")
currentYear = int(now.split("-")[-1])
extrapolatedDates = pd.date_range(start=now, end='1/1/2040', freq='Q')
y40 = np.add(np.multiply(z2[1] + 40, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))
(
alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-extrapolatedDates.shape[0]:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
alt.X('index:T', axis=alt.Axis(title='')),
alt.Y('fit2:Q', axis=alt.Axis(title=''))
) +
alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +
alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
) +\
alt.Chart(pd.DataFrame(index=pd.to_datetime(range(currentYear+1, 2040), format="%Y"), data=dfcs.max()['CSUSHPINSA'], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
alt.X('index:T', axis=alt.Axis(title='')),
alt.Y('p:Q', axis=alt.Axis(title=''))
)
).properties(
title='S&P/Case-Shiller U.S. National Home Price Index (stagnation projection)',
width=750,
height=400,
background='white'
)
extrapolatedDates = pd.date_range(start=now, end='1/1/2035', freq='Q')
base = dfcs.max()['CSUSHPINSA']
decline = [base*(v + 1)**-.05 for v in range(extrapolatedDates.shape[0])]
#print(decline)
y40 = np.add(np.multiply(z2[1] + 25, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))
(
alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-extrapolatedDates.shape[0]:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
alt.X('index:T', axis=alt.Axis(title='')),
alt.Y('fit2:Q', axis=alt.Axis(title=''))
) +
alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +
alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
) +\
alt.Chart(pd.DataFrame(index=extrapolatedDates[:-8], data=decline[:-8], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
alt.X('index:T'),
alt.Y('p:Q', axis=alt.Axis(title=''))
)
).properties(
title='S&P/Case-Shiller U.S. National Home Price Index (correction projection)',
width=750,
height=400,
background='white'
)
extrapolatedDates = pd.date_range(start=now, end='1/1/2030', freq='Q')
base = dfcs.max()['CSUSHPINSA']
decline = [base+(base * .011 * v) for v in range(extrapolatedDates.shape[0])]
#print(decline)
y40 = np.add(np.multiply(z2[1] + 11, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))
(
alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-extrapolatedDates.shape[0]:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
alt.X('index:T', axis=alt.Axis(title='')),
alt.Y('fit2:Q', axis=alt.Axis(title=''))
) +
alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('fit:Q', axis=alt.Axis(title=''))
) +
alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
alt.X('date:T', axis=alt.Axis(title='')),
alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
) +\
alt.Chart(pd.DataFrame(index=extrapolatedDates[:-8], data=decline[:-8], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
alt.X('index:T', axis=alt.Axis(title='')),
alt.Y('p:Q', axis=alt.Axis(title=''))
)
).properties(
title='S&P/Case-Shiller U.S. National Home Price Index (continued growth projection)',
width=750,
height=400,
background='white'
)
# original source: "http://www.econ.yale.edu/~shiller/data/Fig2-1.xls"
df100yr = pd.read_excel("http://www.econ.yale.edu/~shiller/data/Fig2-1.xls", skiprows=3)
names = [" ".join(map(lambda v: str(v).replace("nan", ""), df100yr[:3][c])).strip() for c in df100yr.columns]
unames = [nm + str(n) if (n != 0 and nm == "Date") else nm for n, nm in enumerate(names)]
df100yr.columns = unames
df100yr = df100yr.iloc[3:]
df100yr2 = df100yr[["Date", "Home Price Index"]].copy()
df100yr2["Date"] = pd.to_datetime(df100yr2["Date"].apply(int).apply(str), format="%Y")
df100yr2["Home Price Index"] = df100yr2["Home Price Index"].apply(float)
df100yr_agg = df100yr2.groupby("Date").agg(np.mean).reset_index()
# start from 2015
# date CSUSHPINSA
df100yr_agg_rest = dfcs.resample('1Y').last().reset_index().tail(6)
df100yr_agg_rest.columns = ["Date", "Home Price Index"]
df100_yr_ready = pd.concat([df100yr_agg, df100yr_agg_rest])
df_recessions = getSeries("JHDUSRGDPBR", description="value")
c = (
alt.Chart(df_recessions.reset_index()[["date", "value"]]).mark_bar(color="#D3D3D3").encode(
alt.X("date:T", axis=alt.Axis(title='')),
alt.Y("value", axis=alt.Axis(title='', orient="right", labels=False))
) +\
alt.Chart(df100_yr_ready).mark_line(color='darkred').encode(
alt.X("Date:T", axis=alt.Axis(title='')),
alt.Y("Home Price Index", axis=alt.Axis(orient="left"))
)
).properties(
title="Robert Shiller \"Irrational Exuberance\" Housing Data (inflation adjusted)",
width=750,
height=400,
background='white'
).resolve_scale(y='independent')
c.save('residential-realestate.png')
c.display()
df_pop60 = getSeries("POPTOTUSA647NWDB")
df_pop70 = df_pop60[df_pop60.index > "1969-01-01"]
df_pop70.columns = ["U.S. Population Millions"] + df_pop70.columns.to_list()[1:]
df_pop70 = df_pop70.reset_index()
df_pop70.columns = ["Date"] + df_pop70.columns.to_list()[1:]
df_pop70["U.S. Population Millions"] = df_pop70["U.S. Population Millions"] / 1e6
df_pop70 = df_pop70.iloc[:, :2]
#df_pop70.head()
df100yr_pop = df_pop70.groupby("Date").agg(np.mean)
df100yr_pop = df100yr_pop.resample("1Y").mean().reset_index()
#df100yr_pop["Date"] = pd.to_datetime(df100yr_pop["Date"])
df100yr_hpi = df100_yr_ready.set_index("Date").resample("1Y").mean().reset_index()
df100yr_pop = df100yr_pop.merge(df100yr_hpi[["Date", "Home Price Index"]], left_on="Date", right_on="Date")
basePop = df100yr_pop["U.S. Population Millions"].values[0]
df100yr2_agg = df100yr_pop.copy()
df100yr2_agg["Demand-Adj"] = df100yr_pop["Home Price Index"] / (df100yr_pop["U.S. Population Millions"] / basePop)
skipbad = (df100yr2_agg["Date"] < pd.to_datetime("1960-12-31")) |\
(df100yr2_agg["Date"] > pd.to_datetime("1970-12-31"))
coverbad = (df100yr2_agg["Date"].isin([pd.to_datetime("1959-12-31"), pd.to_datetime("1971-12-31")]))
(
alt.Chart(df100yr2_agg[skipbad]).mark_line().encode(
alt.X("Date", axis=alt.Axis(title='')),
alt.Y("Demand-Adj")
) +\
alt.Chart(df100yr2_agg[coverbad]).mark_line(color="white", strokeDash=[5]).encode(
alt.X("Date", axis=alt.Axis(title='')),
alt.Y("Demand-Adj")
)
).properties(
title="U.S. Historical Housing Price Index (inflation, population adjusted)",
width=750,
height=400,
background='white'
)
df_new = pd.read_excel("https://www.census.gov/construction/nrs/xls/sold_cust.xls", skiprows=9, header=None).iloc[:-4]
#df_new.tail(20)
df_new_latest = pd.read_excel("https://www.census.gov/construction/nrs/xls/newressales.xls", skiprows=8).iloc[:14, :-1]
df_new_latest.columns = ['Month', 'US-Sold', 'NE-Sold', 'MW-Sold', 'S-Sold', 'W-Sold',
'US-Available', 'NE-Available', 'MW-Available', 'S-Available', 'W-Available',
'Months-Supply', 'MedianPrice', 'AveragePrice']
# Drop erroneous label rows
df_new_latest = df_new_latest.dropna(how='all')
df_new_latest = df_new_latest[df_new_latest.Month != 2019]
df_new_latest['Month'] = pd.concat([df_new_latest['Month'].iloc[:3].map(lambda v: re.sub('[^A-z]+', '', str(v).split(' ')[0]) + "-2018"),\
df_new_latest['Month'].iloc[3:].map(lambda v: re.sub('[^A-z]+', '', str(v).split(' ')[0]) + "-2019")])
#df_new_latest.head(20)
df_new_latest['dt'] = pd.to_datetime(df_new_latest['Month'], format='%B-%Y', errors='coerce')
df_new_latest = df_new_latest.dropna()
# https://www.census.gov/housing/hvs/data/histtabs.html
df_o = pd.read_excel("http://www.census.gov/housing/hvs/data/histtab19.xlsx", skiprows=3, engine='openpyxl').iloc[3:158]
df_o = df_o.dropna(how='all')
#df_o.head()
vals = []
for i in range(0, df_o.shape[0], 5):
year = 1994 + int(i / 5)
vals = vals + [None, "{:d}-Q1".format(year), "{:d}-Q2".format(year), "{:d}-Q3".format(year), "{:d}-Q4".format(year)]
#print(vals)
df_o['dt'] = list(map(lambda v: pd.to_datetime(v), vals))
alt.data_transformers.disable_max_rows()
def doRidgeLineFor(df, x, y, row, metric='Home Ownershup', step=100, overlap=2, order=[]):
# Convert snakeCase to Snake Case
label = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", metric)
return alt.Chart(df, height=step).transform_joinaggregate(
mean_val=f'mean({x})', groupby=[row]
).mark_area(
interpolate='monotone',
fillOpacity=0.8,
stroke='lightgray',
strokeWidth=0.5
).encode(
alt.X(f'{x}:T', title=''),
alt.Y(
f'{y}:Q',
scale=alt.Scale(range=[step, -step * overlap]),
axis=None
),
alt.Fill(
f'{row}:N',
title='Category',
#legend=None,
scale=alt.Scale(scheme='category20')
),
tooltip=[alt.Tooltip(f'{x}:T'), alt.Tooltip(f'{row}:N'), alt.Tooltip(f'{y}:Q', format=',.02f')]
).facet(
row=alt.Row(
f'{row}:N',
title=None,
header=alt.Header(labelColor='white'), #, labelAnchor='end')
sort=order
)
).properties(
title=f"Home Ownership Growth By Age Group",
bounds='flush'
).configure_facet(
spacing=0
).configure_view(
stroke=None,
width=650
).configure_title(
anchor='middle'
)
sortOrder = ['Under 35 years', '35 to 44 years', '45 to 54 years',
'55 to 64 years', '65 years and over', 'U.S.']
validColumns = [c for c in df_o.columns if "Unnamed" not in c]
df_orate = df_o[validColumns].dropna(how='all').iloc[:, 1:].melt(id_vars='dt')
alt.Chart(df_orate).mark_line().encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Ownership Rate [%]')),
alt.Color('variable:N', title='Group', sort=sortOrder)
).properties(
title='Home Ownership Rate By Age Group [Since 1994]',
width=700,
height=450
)
yoy = df_o[validColumns].dropna(how='all').iloc[:, 1:].set_index('dt')
yoy = yoy.pct_change(4).apply(lambda v: v*100.0)
doRidgeLineFor(yoy.reset_index().melt(id_vars='dt'), 'dt', 'value', 'variable', order=sortOrder)
alt.Chart(df_orate[df_orate.dt > '01-01-2009']).mark_line().encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Ownership Rate [%]')),
alt.Color('variable:N', title='Group', sort=sortOrder)
).properties(
title='Home Ownership [Since 2009]',
width=700,
height=450
)
from pytrends.request import TrendReq
pytrends = TrendReq(hl='en-US', tz=360)
kw_listgone = ["moving company"]
pytrends.build_payload(kw_listgone, cat=0, timeframe='all', geo='', gprop='')
df_ginterestone = pytrends.interest_over_time().reset_index()
kw_listgtwo = ["mortgage preapproval"]
pytrends.build_payload(kw_listgtwo, cat=0, timeframe='all', geo='', gprop='')
df_ginteresttwo = pytrends.interest_over_time().reset_index()
#df['label'] = df.ts.map(lambda v: 'Price [USD]')
#df_interest['label'] = df_interest.date.map(lambda v: 'Interest [Google Trends]')
df_one = pd.concat([df_ginterestone, df_ginterestone[kw_listgone[0]].map(lambda v: kw_listgone[0]).rename('label')], axis=1)
df_two = pd.concat([df_ginteresttwo, df_ginteresttwo[kw_listgtwo[0]].map(lambda v: kw_listgtwo[0]).rename('label')], axis=1)
s1 = alt.Chart(df_one).mark_line(color='black').encode(
alt.X("date:T", axis=alt.Axis(title=None)),
alt.Y("{}:Q".format(kw_listgone[0]), axis=alt.Axis(title="")),
alt.Color('label', legend=alt.Legend(orient="top-right", title=""))
)
s2 = alt.Chart(df_two).mark_line(color='green').encode(
alt.X("date:T".format(kw_listgtwo[0]), axis=alt.Axis(title=None)),
alt.Y("{}:Q".format(kw_listgtwo[0]), axis=alt.Axis(title="")),
alt.Opacity('label', legend=alt.Legend(orient="top-right", title=""))
)
alt.layer(
s1,
s2
).resolve_scale(
y='independent'
).properties(
title="Google Trends Index for keywords: {}".format(", ".join(kw_listgone + kw_listgtwo)),
width=750,
height=400,
background='white'
)#.interactive()
kw_listg = ["mortgage preapproval"]
pytrends.build_payload(kw_listg, cat=0, timeframe='all', geo='', gprop='')
df_ginterest = pytrends.interest_over_time().reset_index()
#print(df_ginterest)
df_ginterest.columns = ['date'] + df_ginterest.columns[1:-1].to_list() + ['isPartial']
alt.Chart(df_ginterest[['date'] + kw_listg].melt(id_vars=['date'])).mark_line().encode(
alt.X("date:T", axis=alt.Axis(title=None)),
alt.Y("value:Q", axis=alt.Axis(title="Interest Index [Google Trends]")),
alt.Color("variable:N", title='Google Search'),
tooltip=[alt.Tooltip("date:T", format="%B %Y")]
).properties(
title="Google Trends Index for keywords: {}".format(", ".join(kw_listg)),
width=750,
height=400,
background='white'
)
df_us_inventory = pd.read_excel("https://www.census.gov/housing/hvs/data/hist_tab_7a_v2019.xlsx", skiprows=4)
df_us_inventory = df_us_inventory.dropna(how='all').iloc[:14]
df_us_inventory.columns = ["Type"] + [re.sub("[^0-9]", "", str(c)) for c in df_us_inventory.columns[1:]]
df_us_inventory['Type'] = df_us_inventory['Type'].map(lambda v: re.sub("[^A-z-]", "", v.replace(" ", "-")))
df_us_inventory = df_us_inventory[df_us_inventory['Type'] == 'All-housing-units'].T.apply(lambda v: v * 1000).iloc[1:]
df_us_inventory.index = df_us_inventory.index.map(lambda v: pd.to_datetime(v, format="%Y"))
#df_us_inventory
df_households = pd.read_excel("https://www.census.gov/housing/hvs/data/hist_tab_13a_v2019.xlsx", skiprows=4).dropna(how='all')
df_households.columns = ['Year'] + [c for c in df_households.columns[1:]]
df_households['Year'] = df_households['Year'].apply(lambda v: re.sub("[^0-9]", "", str(v)))
df_households = df_households.set_index("Year")
df_households = df_households.applymap(lambda v: v * 1000)
df_households_melted = df_households.reset_index().melt(id_vars='Year').dropna()
df_households_melted['dt'] = pd.to_datetime(df_households_melted['Year'] + "-" + df_households_melted['variable'], format="%Y-%B")
#df_households_melted.head(21)
df_households = df_households_melted[['dt', 'value']]
#df_households
housing_merged = df_households.set_index('dt').merge(df_us_inventory, left_index=True, right_index=True)
housing_merged.columns = ['Census-counted households', 'Existing housing units']
#housing_merged
housing_merged_melted = housing_merged.reset_index().melt(id_vars='index')
#housing_merged_melted
alt.Chart(housing_merged_melted).mark_line().encode(
alt.X('index:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Total')),
alt.Color('variable:N', title='', legend=alt.Legend(orient='top', padding=8)),
tooltip=[alt.Tooltip('index:T', title='Year'), alt.Tooltip('variable:N', title='Measure'), alt.Tooltip('value:Q', format=",.0f")]
).properties(
title='Comparison of Total Households (per 2000 Census) and Total Housing Units',
width=750,
height=450
)
housing_merged_yoy = housing_merged.pct_change(1).apply(lambda v: v * 100).dropna()
housing_merged_yoy.columns = ['Household-growth', 'Housing-unit-growth']
housing_merged_yoy_melted = housing_merged_yoy.reset_index().melt(id_vars='index')
alt.Chart(housing_merged_yoy_melted).mark_line().encode(
alt.X('index:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Year-over-year Change [%]')),
alt.Color('variable:N', title='')
).properties(
title='Comparison of housing demand growth with new supply',
width=750,
height=450
)
dfs = pd.read_excel("https://www.census.gov/construction/nrs/xls/sold_cust.xls", sheet_name='Reg Sold',
skiprows=7, usecols=[0, 1, 2, 3, 4, 5])
dfs.columns = ['Date', 'United States', 'North-east', 'Mid-West', 'South', 'West']
dfs = dfs.replace('(NA)', np.nan)
dfs = dfs.dropna()
dfs['dt'] = pd.to_datetime(dfs['Date'])
dfs = dfs.drop('Date', axis=1)
valc = alt.Chart(dfs).mark_line(color='black').encode(
alt.X('dt:T'),
alt.Y('United States:Q')
).properties(
title='US New Home Sales',
width=750,
height=450
)
dfs_yoy = dfs.set_index('dt').pct_change(12).reset_index()
yoyc = alt.Chart(dfs_yoy).mark_bar(width=2).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('United States:Q', axis=alt.Axis(title='Year-over-year Change [%]', format='%')),
color=alt.condition("datum['United States'] > 0",
alt.value('steelblue'),
alt.value('darkred')
),
tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('United States:Q', title='% Change', format='.0%')]
).properties(
title='US New Home Sales',
width=750,
height=450
)
valc & yoyc
# Housing Starts
dfs_s = pd.read_excel("https://www.census.gov/construction/nrc/xls/starts_cust.xls",
sheet_name='StartsUA ', skiprows=6)
s_cols = ['Date',
'AllUnits', '1Unit', '2to4Units', '5PlusUnits',
'NE-AllUnits', 'NE1Unit',
'MW-AllUnits', 'MW1Unit',
'S-AllUnits', 'S1Unit',
'W-AllUnits', 'W1Unit']
dfs_s.columns = s_cols
dfs_s = dfs_s.replace('(NA)', np.nan)
#dfs_s = dfs_s.dropna()
dfs_s['dt'] = pd.to_datetime(dfs_s['Date'], errors='coerce')
dfs_s = dfs_s[~pd.isna(dfs_s['dt'])]
dfs_s = dfs_s.drop('Date', axis=1)
#dfs_s.keys()
alt.Chart(dfs_s).mark_line().encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('AllUnits:Q'),
tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('AllUnits:Q', title='Units started')]
).properties(
title='US New Residential Unit Starts',
width=750,
height=450
)
dfs_s_yoy = dfs_s.set_index('dt').pct_change(12).reset_index()
alt.Chart(dfs_s_yoy[-12*20:]).mark_bar(size=2).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('AllUnits:Q', axis=alt.Axis(title='Year-over-year Change [%]', format='%')),
color=alt.condition("datum['AllUnits'] > 0",
alt.value('steelblue'),
alt.value('darkred')
),
tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('AllUnits:Q', title='Growth', format='.1%')]
).properties(
title='US New Residential Unit Start Growth',
width=750,
height=450
)
# Completions
dfs_comp_raw = pd.read_excel('https://www.census.gov/construction/nrc/xls/co_cust.xls', sheet_name=None)
dfs_comp = dfs_comp_raw['CompsUA']
dfs_comp = dfs_comp.iloc[8:]
dfs_comp.columns = s_cols
dfs_comp = dfs_comp.replace('(NA)', np.nan)
dfs_comp['dt'] = pd.to_datetime(dfs_comp['Date'], errors='coerce')
#dfs_comp = dfs_comp[~pd.isna(dfs_comp['dt'])]
dfs_comp = dfs_comp.drop('Date', axis=1)
#dfs_comp.tail(20)
alt.Chart(dfs_comp).mark_line().encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('AllUnits:Q'),
tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('AllUnits:Q', title='Units completed')]
).properties(
title='US New Residential Unit Completions',
width=750,
height=450
)
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options
import selenium.webdriver.support.expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
def ajaxComplete(driver):
try:
return 0 == driver.execute_script("return jQuery.active")
except WebDriverException:
pass
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-gpu")
#chrome_options.add_argument("--headless")
#chrome_options.add_argument("download.default_directory=/tmp")
chrome_options.add_experimental_option("prefs", { "download.default_directory" : "/tmp" })
# https://redfin-public-data.s3-us-west-2.amazonaws.com/redfin_covid19/weekly_housing_market_data_most_recent.tsv
baseUrl = "https://public.tableau.com/views/RedfinDataCentertrend/{0}?%3AshowVizHome=no"
metricFilenameLUT = {
"HomeSales": { "filename": "sales", "label": "Average Homes Sold" },
"MedianSalePrice": { "filename": "med_sale_price", "label": "Median Sale Price" },
"Inventory": { "filename": "inv", "label": "Number of Homes"},
"PriceDrops": { "filename": "pd", "label": "Percent with Drops"},
"PricePerSquareFoot": { "filename": "ppsf", "label": "Price per sqft"},
}
topMetros = [
"Dallas, TX metro area",
"Houston, TX metro area",
"Miami, FL metro area",
"Atlanta, GA metro area",
"Phoeniz, AZ metro area",
"San Francisco, CA metro area",
"Detroit, MI metro area",
"Minneapolis, MN metro area",
"San Diego, CA metro area",
"Tampa, FL metro area",
"Denver, CO metro area",
"St. Louis, MO metro area",
"Baltimore, MD metro area",
"Charlotte, NC metro area",
"Orlando, FL metro area",
"San Antonio, TX metro area",
"Portland, OR metro area",
"Pittsburgh, PA metro area",
"Las Vegas, NV metro area",
"Austin, TX metro area",
"Cincinnati, OH metro area",
"Kansas City, MO metro area",
"Columbus, OH metro area",
"Indianapolis, IN metro area",
"Cleveland, OH metro area",
"San Jose, CA metro area",
"Nashville, TN metro area",
"Virginia Beach, NC metro area",
"Providence, RI metro area",
"Milwaukee, WI metro area",
"Jacksonville, FL metro area",
"Oklahoma City, OK metro area",
"Raleigh, NC metro area",
"Memphis, TN metro area",
"Richmod, VA metro area",
"New Orleans, LA metro area",
"Louisville, KY metro area",
"Salt Lake City, UT metro area",
"Hartford, CT metro area",
"Buffalo, NY metro area",
"Birmingham, AL metro area",
]
def addSelection(wait, region="Denver, CO metro area"):
regionSearchButton = "div.tabComboBoxButtonHolder > span"
# Click the search filter button
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, regionSearchButton))).click()
regionSearchTextbox = "div[id^='tableau_base_widget_LegacyCategoricalQuickFilter'][id$='menu'] textarea"
# Fill out the search text box
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, regionSearchTextbox))).send_keys(region)
sleep(6)
# Tab to the region's checkbox and select it
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, regionSearchTextbox))).send_keys(Keys.TAB + " ")
sleep(1)
applyButton = "div[id^='tableau_base_widget_LegacyCategoricalQuickFilter'][id$='menu'] > div.CFApplyButtonContainer > button.tab-button.tab-widget.apply"
# Click the apply button
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, applyButton))).click()
#sleep(5)
# Wait for loading to stop
#wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "div[class*='clear-glass']")))
# Escape the search filter to close it
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, regionSearchTextbox))).send_keys(Keys.ESCAPE)
def fetch(metric="HomeSales"):
driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver',
chrome_options=chrome_options)
driver.get(baseUrl.format(metric))
wait = WebDriverWait(driver, 45)
wait.until(ajaxComplete)
#print("page loaded")
# TODO get to work
#addSelection(wait, "Denver, CO metro area")
wait.until(EC.staleness_of((By.CSS_SELECTOR, "div[id='loadingGlassPane']")))
downloadButton = ".tab-icon-download"
# Click the download button
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, downloadButton))).click()
#print("download button clicked")
# Click the crosstab format
crosstabButton = "#DownloadDialog-Dialog-Body-Id > div > fieldset > button:nth-child(4)"
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, crosstabButton))).click()
#print("crosstab button clicked")
# Choose CSV export format
csvFormat = "#export-crosstab-options-dialog-Dialog-BodyWrapper-Dialog-Body-Id > div > div:nth-child(2) > div:nth-child(2) > div > label:nth-child(2)"
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, csvFormat))).click()
#print("csv option clicked")
# Start the download
startDownload = "#export-crosstab-options-dialog-Dialog-BodyWrapper-Dialog-Body-Id > div > div:nth-child(3) > button"
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, startDownload))).click()
#print("start download clicked")
sleep(10)
driver.close()
try:
# Case for sales data
df = pd.read_csv("/tmp/{0}.csv".format(metricFilenameLUT[metric]['filename']), encoding='UTF-16', sep='\t')
#print(df.iloc[0, 1:])
df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda v: int(v.replace(",", "")))
except ValueError:
# Case for median sale price data
df = pd.read_csv("/tmp/{0}.csv".format(metricFilenameLUT[metric]['filename']),
encoding='UTF-16', sep='\t', skiprows=1)
#print(df.iloc[0, 1:])
df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda v: int(v.replace("K", "000").replace("$", "")))
return df
#df_tmp = fetch("HomeSales")
#fetch("HomeSales").head(10)
df_rf = pd.read_csv("https://redfin-public-data.s3-us-west-2.amazonaws.com/redfin_covid19/weekly_housing_market_data_most_recent.tsv",
sep="\t")
#df_rf.head()
metric = "HomeSales"
df_sales = fetch(metric)
df_sales_melted = df_sales.melt(id_vars='Region', var_name='Period', value_name=metricFilenameLUT[metric]['label'])
df_sales_melted = df_sales_melted[df_sales_melted['Region'] == ' National']
df_sales_melted['Year of Period End'] = df_sales_melted['Period'].apply(lambda v: v.split(" ")[-1])
df_n = pd.DataFrame()
for y in df_sales_melted['Year of Period End'].unique():
tmp = df_sales_melted[df_sales_melted['Year of Period End'] == y]['Average Homes Sold'].cumsum()
tmp.index = range(tmp.shape[0])
tmp.name = y
df_n = pd.concat([df_n, tmp], axis=1)
monthNames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
intsToMonths = { i: n for i, n in zip(range(12), monthNames) }
df_n = df_n.rename(intsToMonths)
try:
df_remelted = df_n[['2021', '2020', '2019', '2018', '2017']].reset_index().melt(id_vars=['index']).sort_values(['variable', 'value']).dropna()
except KeyError:
df_remelted = df_n[['2020', '2019', '2018', '2017']].reset_index().melt(id_vars=['index']).sort_values(['variable', 'value']).dropna()
alt.Chart(df_remelted).mark_line().transform_calculate(
logged='log(datum.value)'
).encode(
alt.X('index:O', axis=alt.Axis(title='', labelAngle=0), sort=monthNames),
alt.Y('value:Q', axis=alt.Axis(title='Cumulative Sales'), scale=alt.Scale(zero=False)),
alt.Color('variable:N', title='Year'),
tooltip=[alt.Tooltip('index:O', title='Month'), alt.Tooltip('variable:N', title='Year'),
alt.Tooltip('value:Q', title='Units', format=',d')]
).properties(
title=['US Annual Home Sales', 'Source: Redfin'],
width=750,
height=450,
background='white'
)
df_melted['dt'] = df_melted['Period'].map(lambda v: pd.to_datetime(v, format="%B %Y"))
df_yoy = df_melted.set_index('dt')['Average Homes Sold'].pct_change(12).reset_index()
alt.Chart(df_yoy).mark_bar().encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y')),
alt.Y('Average Homes Sold:Q', axis=alt.Axis(title='Year-over-Year Sales Growth [%]', format='%')),
color=alt.condition("datum['Average Homes Sold'] < 0",
alt.value('salmon'),
alt.value('grey')
),
tooltip=[
alt.Tooltip('dt:T', title='Date'),
alt.Tooltip('Average Homes Sold:Q', title='Change', format='.0%')
]
).properties(
title=['US Home Sales Growth', 'Source: Redfin'],
width=750,
height=450
)
metric = "MedianSalePrice"
df_sale_price = fetch(metric)
df_sale_price_melted = df_sale_price.melt(id_vars='Region', var_name='Period', value_name=metricFilenameLUT[metric]['label'])
df_sale_price_melted = df_sale_price_melted[df_sale_price_melted['Region'] == ' National']
df_sale_price_melted['Year of Period End'] = df_sale_price_melted['Period'].apply(lambda v: v.split(" ")[-1])
df_sale_price_melted['dt'] = df_sale_price_melted['Period'].apply(lambda v: pd.to_datetime(v, format='%B %Y'))
#df_sale_price_melted.head()
alt.Chart(df_sale_price_melted).mark_line().encode(
alt.X('Period:T', axis=alt.Axis(title='', format='%b %y')),
alt.Y('Median Sale Price:Q', axis=alt.Axis(title='Median Sale Price [USD]'))
).properties(
title=['US Home Median Sale Price', 'Source: Redfin'],
width=750,
height=450
)
df_sale_price_yoy = df_sale_price_melted.set_index('dt')['Median Sale Price'].pct_change(12).reset_index()
alt.Chart(df_sale_price_yoy).mark_bar().encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %y')),
alt.Y('Median Sale Price:Q', axis=alt.Axis(title='Year over Year Change [%]', format='%')),
color=alt.condition("datum['Median Sale Price'] < 0",
alt.value('salmon'),
alt.value('grey')
),
tooltip=[
alt.Tooltip('dt:T', title='Date'),
alt.Tooltip('Median Sale Price:Q', title='Change', format='.02%')
]
).properties(
title=['US Home Median Price Growth', 'Source: Redfin'],
width=750,
height=450
)
metric = "Inventory"
df_inv = fetch(metric)
df_inv_melted = df_inv.melt(id_vars='Region', var_name='Period', value_name=metricFilenameLUT[metric]['label'])
df_inv_melted = df_inv_melted[df_inv_melted['Region'] == ' National']
df_inv_melted['Year of Period End'] = df_inv_melted['Period'].apply(lambda v: v.split(" ")[-1])
df_inv_melted['dt'] = df_inv_melted['Period'].apply(lambda v: pd.to_datetime(v, format='%B %Y'))
#df_inv_melted.head()
alt.Chart(df_inv_melted).mark_line().encode(
alt.X('Period:T', axis=alt.Axis(title='', format='%b %y')),
alt.Y('Number of Homes:Q', axis=alt.Axis(title='Homes for Sale'))
).properties(
title=['US Homes For Sale', 'Source: Redfin'],
width=750,
height=450
)
df_inv_yoy = df_inv_melted.set_index('dt')['Number of Homes'].pct_change(12).reset_index()
alt.Chart(df_inv_yoy).mark_bar().encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%b %y')),
alt.Y('Number of Homes:Q', axis=alt.Axis(title='Year-over-Year Change [%]', format='%')),
color=alt.condition("datum['Number of Homes'] < 0",
alt.value('salmon'),
alt.value('grey')
),
tooltip=[
alt.Tooltip('dt:T', title='Date'),
alt.Tooltip('Number of Homes:Q', title='Change', format='.02%')
]
).properties(
title=['US Home Inventory Growth', 'Source: Redfin'],
width=750,
height=450
)
# TODO: add selection to export
# TODO: Animated map gif for metros
"""
df_rdc = pd.read_csv("https://econdata.s3-us-west-2.amazonaws.com/Reports/Hotness/RDC_Inventory_Hotness_Metrics_Metro_History.csv")
df_rdc = df_rdc.dropna()
#df_rdc.head()
import numpy as np
dmetro = df_rdc[df_rdc['cbsa_title'].str.contains(".*denver.*")]
dmetro_melted = dmetro[['month_date_yyyymm', 'hotness_score', 'supply_score', 'demand_score']].melt(id_vars=['month_date_yyyymm'])
dmetro_melted['dt'] = dmetro_melted['month_date_yyyymm'].apply(lambda v: pd.to_datetime(v, format='%Y%m'))
alt.Chart(dmetro_melted).mark_line().encode(
alt.X('dt:T', axis=alt.Axis(title='', format='%B %Y')),
alt.Y('value:Q', axis=alt.Axis(title='Score')),
alt.Color('variable:N', title='Category')
).properties(
title='Realtor.com Market Hotness: Denver-Aurora-Lakewood, CO',
width=750,
height=450,
background='white'
)
"""