US Macroeconomic Data Explorer | Residential Real Estate

Residential Real Estate


A look at residental real estate, according to the Federal Reserve, the Federal Housing Authority, and Redfin.

In [1]:
import pandas as pd
import altair as alt
import numpy as np
import re
from os import environ
from datetime import datetime

try:
    # for local execution
    apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
    apiKeyFromFile = None
    pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)

if environ.get("GITHUB_WORKFLOW"):
    raise RuntimeError("Requires manual updates")

from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
    
def getSeries(series="", apiKey=apiKey, description=None):
    fetchCommand = "https://api.stlouisfed.org/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt" 
    
    resp = urlopen(fetchCommand.format(s=series, k=apiKey))
    zipfile = ZipFile(BytesIO(resp.read()))
    
    filesInZip = zipfile.namelist()
    
    data = zipfile.open(filesInZip[1])
    
    if description is None:
        description = series
    
    df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
                       names=["date", description, "rt_start", "rt_end"], na_values=".")
    
    df['date'] = pd.to_datetime(df.date)
    
    return df.set_index("date")

df = getSeries("USSTHPI")

df2 = df.resample('1Q').mean()
In [2]:
subset = df2[df2.index <= '2000-12-31']

y = subset.USSTHPI.values
x = [n for n, v in enumerate(subset.index.values)]

z = np.polyfit(x, y, 1)

        # 1 * x^0 + b 
trend = np.add(np.multiply(z[1], 1), np.multiply(range(df2.shape[0]), z[0]))

How do current prices compare with the pre-low-interest rate era? (i.e. before 2001)

In [3]:
(
    alt.Chart(pd.DataFrame(index=df2.index, data=trend, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +\
    alt.Chart(df2.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('USSTHPI:Q', axis=alt.Axis(title='Price Index [1980-Q1 = 100]'))
    )
).properties(
    title='US FHA House Price Index',
    width=750,
    height=400,
    background='white'
)
Out[3]:
In [4]:
dfcs = getSeries("CSUSHPINSA").resample('1Q').mean()

#dfcs.head()

subset2 = dfcs[dfcs.index <= '2000-12-31']

y2 = subset2.CSUSHPINSA.values
x2 = [n for n, v in enumerate(subset2.index.values)]

z2 = np.polyfit(x2, y2, 1)

trend2 = np.add(np.multiply(z2[1], 1), np.multiply(range(dfcs.shape[0]), z2[0]))
In [5]:
(
    alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +\
    alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
    )
).properties(
    title='S&P/Case-Shiller U.S. National Home Price Index',
    width=750,
    height=400,
    background='white'
)
Out[5]:

Some possible scenarios of where home prices go from here...

In [11]:
now = datetime.now().strftime("%d-%m-%Y")
currentYear = int(now.split("-")[-1])
extrapolatedDates = pd.date_range(start=now, end='1/1/2040', freq='Q')

y40 = np.add(np.multiply(z2[1] + 40, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))

(
    alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-extrapolatedDates.shape[0]:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('fit2:Q', axis=alt.Axis(title=''))
    ) +
    alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +    
    alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
    ) +\
    alt.Chart(pd.DataFrame(index=pd.to_datetime(range(currentYear+1, 2040), format="%Y"), data=dfcs.max()['CSUSHPINSA'], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('p:Q', axis=alt.Axis(title=''))
    )
).properties(
    title='S&P/Case-Shiller U.S. National Home Price Index (stagnation projection)',
    width=750,
    height=400,
    background='white'
)
Out[11]:
In [7]:
extrapolatedDates = pd.date_range(start=now, end='1/1/2035', freq='Q')

base = dfcs.max()['CSUSHPINSA']
decline = [base*(v + 1)**-.05 for v in range(extrapolatedDates.shape[0])]
#print(decline)

y40 = np.add(np.multiply(z2[1] + 25, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))

(
    alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-extrapolatedDates.shape[0]:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('fit2:Q', axis=alt.Axis(title=''))
    ) +
    alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +    
    alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
    ) +\
    alt.Chart(pd.DataFrame(index=extrapolatedDates[:-8], data=decline[:-8], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
        alt.X('index:T'),
        alt.Y('p:Q', axis=alt.Axis(title=''))
    )
).properties(
    title='S&P/Case-Shiller U.S. National Home Price Index (correction projection)',
    width=750,
    height=400,
    background='white'
)
Out[7]:
In [8]:
extrapolatedDates = pd.date_range(start=now, end='1/1/2030', freq='Q')

base = dfcs.max()['CSUSHPINSA']
decline = [base+(base * .011 * v) for v in range(extrapolatedDates.shape[0])]
#print(decline)

y40 = np.add(np.multiply(z2[1] + 11, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))

(
    alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-extrapolatedDates.shape[0]:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('fit2:Q', axis=alt.Axis(title=''))
    ) +
    alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +    
    alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
    ) +\
    alt.Chart(pd.DataFrame(index=extrapolatedDates[:-8], data=decline[:-8], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('p:Q', axis=alt.Axis(title=''))
    )
).properties(
    title='S&P/Case-Shiller U.S. National Home Price Index (continued growth projection)',
    width=750,
    height=400,
    background='white'
)
Out[8]:

How have prices changed over the past century when correcting for inflation?

In [13]:
# original source: "http://www.econ.yale.edu/~shiller/data/Fig2-1.xls"

df100yr = pd.read_excel("http://www.econ.yale.edu/~shiller/data/Fig2-1.xls", skiprows=3)

names = [" ".join(map(lambda v: str(v).replace("nan", ""), df100yr[:3][c])).strip() for c in df100yr.columns]

unames = [nm + str(n) if (n != 0 and nm == "Date") else nm for n, nm in enumerate(names)]

df100yr.columns = unames
df100yr = df100yr.iloc[3:]

df100yr2 = df100yr[["Date", "Home Price Index"]].copy()
df100yr2["Date"] = pd.to_datetime(df100yr2["Date"].apply(int).apply(str), format="%Y")
df100yr2["Home Price Index"] = df100yr2["Home Price Index"].apply(float)
df100yr_agg = df100yr2.groupby("Date").agg(np.mean).reset_index()


# start from 2015
# date	CSUSHPINSA
df100yr_agg_rest = dfcs.resample('1Y').last().reset_index().tail(6)
df100yr_agg_rest.columns = ["Date", "Home Price Index"]

df100_yr_ready = pd.concat([df100yr_agg, df100yr_agg_rest])

df_recessions = getSeries("JHDUSRGDPBR", description="value")

c = (
    alt.Chart(df_recessions.reset_index()[["date", "value"]]).mark_bar(color="#D3D3D3").encode(
        alt.X("date:T", axis=alt.Axis(title='')),
        alt.Y("value", axis=alt.Axis(title='', orient="right", labels=False))
    ) +\
    alt.Chart(df100_yr_ready).mark_line(color='darkred').encode(
        alt.X("Date:T", axis=alt.Axis(title='')),
        alt.Y("Home Price Index", axis=alt.Axis(orient="left"))
    )
).properties(
    title="Robert Shiller \"Irrational Exuberance\" Housing Data (inflation adjusted)",
    width=750,
    height=400,
    background='white'
).resolve_scale(y='independent')

c.save('residential-realestate.png')
c.display()
In [14]:
df_pop60 = getSeries("POPTOTUSA647NWDB")

df_pop70 = df_pop60[df_pop60.index > "1969-01-01"]

df_pop70.columns = ["U.S. Population Millions"] + df_pop70.columns.to_list()[1:] 
df_pop70 = df_pop70.reset_index()
df_pop70.columns = ["Date"] + df_pop70.columns.to_list()[1:]

df_pop70["U.S. Population Millions"] = df_pop70["U.S. Population Millions"] / 1e6

df_pop70 = df_pop70.iloc[:, :2]

#df_pop70.head()
In [15]:
df100yr_pop = df_pop70.groupby("Date").agg(np.mean)
df100yr_pop = df100yr_pop.resample("1Y").mean().reset_index()

#df100yr_pop["Date"] = pd.to_datetime(df100yr_pop["Date"])

df100yr_hpi = df100_yr_ready.set_index("Date").resample("1Y").mean().reset_index()

df100yr_pop = df100yr_pop.merge(df100yr_hpi[["Date", "Home Price Index"]], left_on="Date", right_on="Date")

How have prices changed when also correcting for population growth?

In [16]:
basePop = df100yr_pop["U.S. Population Millions"].values[0]

df100yr2_agg = df100yr_pop.copy()
df100yr2_agg["Demand-Adj"] = df100yr_pop["Home Price Index"] / (df100yr_pop["U.S. Population Millions"] / basePop)

skipbad = (df100yr2_agg["Date"] < pd.to_datetime("1960-12-31")) |\
          (df100yr2_agg["Date"] > pd.to_datetime("1970-12-31"))

coverbad = (df100yr2_agg["Date"].isin([pd.to_datetime("1959-12-31"), pd.to_datetime("1971-12-31")]))

(
    alt.Chart(df100yr2_agg[skipbad]).mark_line().encode(
        alt.X("Date", axis=alt.Axis(title='')),
        alt.Y("Demand-Adj")
    ) +\
    alt.Chart(df100yr2_agg[coverbad]).mark_line(color="white", strokeDash=[5]).encode(
        alt.X("Date", axis=alt.Axis(title='')),
        alt.Y("Demand-Adj")
    )
).properties(
    title="U.S. Historical Housing Price Index (inflation, population adjusted)",
    width=750,
    height=400,
    background='white'
)
Out[16]:
In [17]:
df_new = pd.read_excel("https://www.census.gov/construction/nrs/xls/sold_cust.xls", skiprows=9, header=None).iloc[:-4]

#df_new.tail(20)

df_new_latest = pd.read_excel("https://www.census.gov/construction/nrs/xls/newressales.xls", skiprows=8).iloc[:14, :-1]
df_new_latest.columns = ['Month', 'US-Sold', 'NE-Sold', 'MW-Sold', 'S-Sold', 'W-Sold',
                         'US-Available', 'NE-Available', 'MW-Available', 'S-Available', 'W-Available',
                         'Months-Supply', 'MedianPrice', 'AveragePrice']

# Drop erroneous label rows
df_new_latest = df_new_latest.dropna(how='all')
df_new_latest = df_new_latest[df_new_latest.Month != 2019]

df_new_latest['Month'] = pd.concat([df_new_latest['Month'].iloc[:3].map(lambda v: re.sub('[^A-z]+', '', str(v).split(' ')[0]) + "-2018"),\
                                      df_new_latest['Month'].iloc[3:].map(lambda v: re.sub('[^A-z]+', '', str(v).split(' ')[0]) + "-2019")])

#df_new_latest.head(20)

df_new_latest['dt'] = pd.to_datetime(df_new_latest['Month'], format='%B-%Y', errors='coerce')

df_new_latest = df_new_latest.dropna()

# https://www.census.gov/housing/hvs/data/histtabs.html

df_o = pd.read_excel("http://www.census.gov/housing/hvs/data/histtab19.xlsx", skiprows=3, engine='openpyxl').iloc[3:158]

df_o = df_o.dropna(how='all')

#df_o.head()
In [18]:
vals = []
for i in range(0, df_o.shape[0], 5):
    year = 1994 + int(i / 5)
    vals = vals + [None, "{:d}-Q1".format(year), "{:d}-Q2".format(year), "{:d}-Q3".format(year), "{:d}-Q4".format(year)]
    
#print(vals)

df_o['dt'] = list(map(lambda v: pd.to_datetime(v), vals))
In [19]:
alt.data_transformers.disable_max_rows()

def doRidgeLineFor(df, x, y, row, metric='Home Ownershup', step=100, overlap=2, order=[]):
    # Convert snakeCase to Snake Case
    label = re.sub("([a-z])([A-Z])", "\g<1> \g<2>", metric)

    return alt.Chart(df, height=step).transform_joinaggregate(
        mean_val=f'mean({x})', groupby=[row]
    ).mark_area(
        interpolate='monotone',
        fillOpacity=0.8,
        stroke='lightgray',
        strokeWidth=0.5
    ).encode(
        alt.X(f'{x}:T', title=''),
        alt.Y(
            f'{y}:Q',
            scale=alt.Scale(range=[step, -step * overlap]),
            axis=None
        ),
        alt.Fill(
            f'{row}:N',
            title='Category',
            #legend=None,
            scale=alt.Scale(scheme='category20')
        ),
        tooltip=[alt.Tooltip(f'{x}:T'), alt.Tooltip(f'{row}:N'), alt.Tooltip(f'{y}:Q', format=',.02f')]
    ).facet(
        row=alt.Row(
            f'{row}:N',
            title=None,
            header=alt.Header(labelColor='white'), #, labelAnchor='end')
            sort=order
        )
    ).properties(
        title=f"Home Ownership Growth By Age Group",
        bounds='flush'
    ).configure_facet(
        spacing=0
    ).configure_view(
        stroke=None,
        width=650
    ).configure_title(
        anchor='middle'
    )
    

How has home ownership changed by age group since the mid-90s?

In [20]:
sortOrder = ['Under 35 years', '35 to 44 years', '45 to 54 years', 
             '55 to 64 years', '65 years and over', 'U.S.']

validColumns = [c for c in df_o.columns if "Unnamed" not in c]

df_orate = df_o[validColumns].dropna(how='all').iloc[:, 1:].melt(id_vars='dt')

alt.Chart(df_orate).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('value:Q', axis=alt.Axis(title='Ownership Rate [%]')),
    alt.Color('variable:N', title='Group', sort=sortOrder)
).properties(
    title='Home Ownership Rate By Age Group [Since 1994]',
    width=700,
    height=450
)
Out[20]:
In [21]:
yoy = df_o[validColumns].dropna(how='all').iloc[:, 1:].set_index('dt')
yoy = yoy.pct_change(4).apply(lambda v: v*100.0)

doRidgeLineFor(yoy.reset_index().melt(id_vars='dt'), 'dt', 'value', 'variable', order=sortOrder)
Out[21]:
In [22]:
alt.Chart(df_orate[df_orate.dt > '01-01-2009']).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('value:Q', axis=alt.Axis(title='Ownership Rate [%]')),
    alt.Color('variable:N', title='Group', sort=sortOrder)
).properties(
    title='Home Ownership [Since 2009]',
    width=700,
    height=450
)
Out[22]:
In [23]:
from pytrends.request import TrendReq

pytrends = TrendReq(hl='en-US', tz=360)


kw_listgone = ["moving company"]
pytrends.build_payload(kw_listgone, cat=0, timeframe='all', geo='', gprop='')

df_ginterestone = pytrends.interest_over_time().reset_index()

kw_listgtwo = ["mortgage preapproval"]
pytrends.build_payload(kw_listgtwo, cat=0, timeframe='all', geo='', gprop='')

df_ginteresttwo = pytrends.interest_over_time().reset_index()

#df['label'] = df.ts.map(lambda v: 'Price [USD]')
#df_interest['label'] = df_interest.date.map(lambda v: 'Interest [Google Trends]')

df_one = pd.concat([df_ginterestone, df_ginterestone[kw_listgone[0]].map(lambda v: kw_listgone[0]).rename('label')], axis=1)
df_two = pd.concat([df_ginteresttwo, df_ginteresttwo[kw_listgtwo[0]].map(lambda v: kw_listgtwo[0]).rename('label')], axis=1)

s1 = alt.Chart(df_one).mark_line(color='black').encode(
    alt.X("date:T", axis=alt.Axis(title=None)),
    alt.Y("{}:Q".format(kw_listgone[0]), axis=alt.Axis(title="")),
    alt.Color('label', legend=alt.Legend(orient="top-right", title=""))
)

s2 = alt.Chart(df_two).mark_line(color='green').encode(
    alt.X("date:T".format(kw_listgtwo[0]), axis=alt.Axis(title=None)),
    alt.Y("{}:Q".format(kw_listgtwo[0]), axis=alt.Axis(title="")),
    alt.Opacity('label', legend=alt.Legend(orient="top-right", title=""))
)

alt.layer(
    s1,
    s2
).resolve_scale(
    y='independent'
).properties(
    title="Google Trends Index for keywords: {}".format(", ".join(kw_listgone + kw_listgtwo)),
    width=750,
    height=400,
    background='white'
)#.interactive()
Out[23]:
In [24]:
kw_listg = ["mortgage preapproval"]
pytrends.build_payload(kw_listg, cat=0, timeframe='all', geo='', gprop='')

df_ginterest = pytrends.interest_over_time().reset_index()

#print(df_ginterest)
df_ginterest.columns = ['date'] + df_ginterest.columns[1:-1].to_list() + ['isPartial']

alt.Chart(df_ginterest[['date'] + kw_listg].melt(id_vars=['date'])).mark_line().encode(
    alt.X("date:T", axis=alt.Axis(title=None)),
    alt.Y("value:Q", axis=alt.Axis(title="Interest Index [Google Trends]")),
    alt.Color("variable:N", title='Google Search'),
    tooltip=[alt.Tooltip("date:T", format="%B %Y")]
).properties(
    title="Google Trends Index for keywords: {}".format(", ".join(kw_listg)),
    width=750,
    height=400,
    background='white'
)
Out[24]:

How does the inventory for US housing compare to the number of households?

In [25]:
df_us_inventory = pd.read_excel("https://www.census.gov/housing/hvs/data/hist_tab_7a_v2019.xlsx", skiprows=4)

df_us_inventory = df_us_inventory.dropna(how='all').iloc[:14]

df_us_inventory.columns = ["Type"] + [re.sub("[^0-9]", "", str(c)) for c in df_us_inventory.columns[1:]]

df_us_inventory['Type'] = df_us_inventory['Type'].map(lambda v: re.sub("[^A-z-]", "", v.replace(" ", "-")))

df_us_inventory = df_us_inventory[df_us_inventory['Type'] == 'All-housing-units'].T.apply(lambda v: v * 1000).iloc[1:]

df_us_inventory.index = df_us_inventory.index.map(lambda v: pd.to_datetime(v, format="%Y"))

#df_us_inventory
In [26]:
df_households = pd.read_excel("https://www.census.gov/housing/hvs/data/hist_tab_13a_v2019.xlsx", skiprows=4).dropna(how='all')

df_households.columns = ['Year'] + [c for c in df_households.columns[1:]]

df_households['Year'] = df_households['Year'].apply(lambda v: re.sub("[^0-9]", "", str(v)))

df_households = df_households.set_index("Year")

df_households = df_households.applymap(lambda v: v * 1000)

df_households_melted = df_households.reset_index().melt(id_vars='Year').dropna()

df_households_melted['dt'] = pd.to_datetime(df_households_melted['Year'] + "-" + df_households_melted['variable'], format="%Y-%B")

#df_households_melted.head(21)

df_households = df_households_melted[['dt', 'value']]

#df_households
In [27]:
housing_merged = df_households.set_index('dt').merge(df_us_inventory, left_index=True, right_index=True)

housing_merged.columns = ['Census-counted households', 'Existing housing units']

#housing_merged
In [28]:
housing_merged_melted = housing_merged.reset_index().melt(id_vars='index')

#housing_merged_melted
In [29]:
alt.Chart(housing_merged_melted).mark_line().encode(
    alt.X('index:T', axis=alt.Axis(title='')),
    alt.Y('value:Q', axis=alt.Axis(title='Total')),
    alt.Color('variable:N', title='', legend=alt.Legend(orient='top', padding=8)),
    tooltip=[alt.Tooltip('index:T', title='Year'), alt.Tooltip('variable:N', title='Measure'), alt.Tooltip('value:Q', format=",.0f")]
).properties(
    title='Comparison of Total Households (per 2000 Census) and Total Housing Units',
    width=750,
    height=450
)
Out[29]:
In [30]:
housing_merged_yoy = housing_merged.pct_change(1).apply(lambda v: v * 100).dropna()

housing_merged_yoy.columns = ['Household-growth', 'Housing-unit-growth']

housing_merged_yoy_melted = housing_merged_yoy.reset_index().melt(id_vars='index')

alt.Chart(housing_merged_yoy_melted).mark_line().encode(
    alt.X('index:T', axis=alt.Axis(title='')),
    alt.Y('value:Q', axis=alt.Axis(title='Year-over-year Change [%]')),
    alt.Color('variable:N', title='')
).properties(
    title='Comparison of housing demand growth with new supply',
    width=750,
    height=450
)
Out[30]:
In [31]:
dfs = pd.read_excel("https://www.census.gov/construction/nrs/xls/sold_cust.xls", sheet_name='Reg Sold',
                    skiprows=7, usecols=[0, 1, 2, 3, 4, 5])
 
dfs.columns = ['Date', 'United States', 'North-east', 'Mid-West', 'South', 'West']
dfs = dfs.replace('(NA)', np.nan)
dfs = dfs.dropna()

dfs['dt'] = pd.to_datetime(dfs['Date'])

dfs = dfs.drop('Date', axis=1)
In [32]:
valc = alt.Chart(dfs).mark_line(color='black').encode(
    alt.X('dt:T'),
    alt.Y('United States:Q')
).properties(
    title='US New Home Sales',
    width=750,
    height=450
)
In [33]:
dfs_yoy = dfs.set_index('dt').pct_change(12).reset_index()

yoyc = alt.Chart(dfs_yoy).mark_bar(width=2).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('United States:Q', axis=alt.Axis(title='Year-over-year Change [%]', format='%')),
    color=alt.condition("datum['United States'] > 0",
        alt.value('steelblue'),
        alt.value('darkred')
    ),
    tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('United States:Q', title='% Change', format='.0%')]
).properties(
    title='US New Home Sales',
    width=750,
    height=450
)

valc & yoyc
Out[33]:
In [34]:
# Housing Starts
dfs_s = pd.read_excel("https://www.census.gov/construction/nrc/xls/starts_cust.xls",
                      sheet_name='StartsUA ', skiprows=6)

s_cols = ['Date',
          'AllUnits', '1Unit', '2to4Units', '5PlusUnits',
          'NE-AllUnits', 'NE1Unit',
          'MW-AllUnits', 'MW1Unit',
          'S-AllUnits', 'S1Unit',
          'W-AllUnits', 'W1Unit']

dfs_s.columns = s_cols

dfs_s = dfs_s.replace('(NA)', np.nan)
#dfs_s = dfs_s.dropna()

dfs_s['dt'] = pd.to_datetime(dfs_s['Date'], errors='coerce')
dfs_s = dfs_s[~pd.isna(dfs_s['dt'])]

dfs_s = dfs_s.drop('Date', axis=1)
#dfs_s.keys()
In [35]:
alt.Chart(dfs_s).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('AllUnits:Q'),
    tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('AllUnits:Q', title='Units started')]
).properties(
    title='US New Residential Unit Starts',
    width=750,
    height=450
)
Out[35]:
In [36]:
dfs_s_yoy = dfs_s.set_index('dt').pct_change(12).reset_index()

alt.Chart(dfs_s_yoy[-12*20:]).mark_bar(size=2).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('AllUnits:Q', axis=alt.Axis(title='Year-over-year Change [%]', format='%')),
    color=alt.condition("datum['AllUnits'] > 0",
        alt.value('steelblue'),
        alt.value('darkred')
    ),
    tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('AllUnits:Q', title='Growth', format='.1%')]
).properties(
    title='US New Residential Unit Start Growth',
    width=750,
    height=450
)
Out[36]:
In [37]:
# Completions

dfs_comp_raw = pd.read_excel('https://www.census.gov/construction/nrc/xls/co_cust.xls', sheet_name=None)
In [38]:
dfs_comp = dfs_comp_raw['CompsUA']
dfs_comp = dfs_comp.iloc[8:]
dfs_comp.columns = s_cols
dfs_comp = dfs_comp.replace('(NA)', np.nan)
dfs_comp['dt'] = pd.to_datetime(dfs_comp['Date'], errors='coerce')
#dfs_comp = dfs_comp[~pd.isna(dfs_comp['dt'])]

dfs_comp = dfs_comp.drop('Date', axis=1)

#dfs_comp.tail(20)
In [39]:
alt.Chart(dfs_comp).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('AllUnits:Q'),
    tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('AllUnits:Q', title='Units completed')]
).properties(
    title='US New Residential Unit Completions',
    width=750,
    height=450
)
Out[39]:
In [40]:
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options
import selenium.webdriver.support.expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

def ajaxComplete(driver):
    try:
        return 0 == driver.execute_script("return jQuery.active")
    except WebDriverException:
        pass
    
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-gpu")
#chrome_options.add_argument("--headless")
#chrome_options.add_argument("download.default_directory=/tmp")
chrome_options.add_experimental_option("prefs", { "download.default_directory" : "/tmp" })

# https://redfin-public-data.s3-us-west-2.amazonaws.com/redfin_covid19/weekly_housing_market_data_most_recent.tsv

baseUrl = "https://public.tableau.com/views/RedfinDataCentertrend/{0}?%3AshowVizHome=no"

metricFilenameLUT = {
    "HomeSales": { "filename": "sales", "label": "Average Homes Sold" },
    "MedianSalePrice":  { "filename": "med_sale_price", "label": "Median Sale Price" },
    "Inventory": { "filename": "inv", "label": "Number of Homes"},
    "PriceDrops": { "filename": "pd", "label": "Percent with Drops"},
    "PricePerSquareFoot": { "filename": "ppsf", "label": "Price per sqft"},
}

topMetros = [
    "Dallas, TX metro area",
    "Houston, TX metro area",
    "Miami, FL metro area",
    "Atlanta, GA metro area",
    "Phoeniz, AZ metro area",
    "San Francisco, CA metro area",
    "Detroit, MI metro area",
    "Minneapolis, MN metro area",
    "San Diego, CA metro area",
    "Tampa, FL metro area",
    "Denver, CO metro area",
    "St. Louis, MO metro area",
    "Baltimore, MD metro area",
    "Charlotte, NC metro area",
    "Orlando, FL metro area",
    "San Antonio, TX metro area",
    "Portland, OR metro area",
    "Pittsburgh, PA metro area",
    "Las Vegas, NV metro area",
    "Austin, TX metro area",
    "Cincinnati, OH metro area",
    "Kansas City, MO metro area",
    "Columbus, OH metro area",
    "Indianapolis, IN metro area",
    "Cleveland, OH metro area",
    "San Jose, CA metro area",
    "Nashville, TN metro area",
    "Virginia Beach, NC metro area",
    "Providence, RI metro area",
    "Milwaukee, WI metro area",
    "Jacksonville, FL metro area",
    "Oklahoma City, OK metro area",
    "Raleigh, NC metro area",
    "Memphis, TN metro area",
    "Richmod, VA metro area",
    "New Orleans, LA metro area",
    "Louisville, KY metro area",
    "Salt Lake City, UT metro area",
    "Hartford, CT metro area",
    "Buffalo, NY metro area",
    "Birmingham, AL metro area",
]

def addSelection(wait, region="Denver, CO metro area"):
    regionSearchButton = "div.tabComboBoxButtonHolder > span"
    # Click the search filter button
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, regionSearchButton))).click()
    
    regionSearchTextbox = "div[id^='tableau_base_widget_LegacyCategoricalQuickFilter'][id$='menu'] textarea"
    # Fill out the search text box
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, regionSearchTextbox))).send_keys(region)
    
    sleep(6)
    
    # Tab to the region's checkbox and select it
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, regionSearchTextbox))).send_keys(Keys.TAB + " ")
    
    sleep(1)
    
    applyButton = "div[id^='tableau_base_widget_LegacyCategoricalQuickFilter'][id$='menu'] > div.CFApplyButtonContainer > button.tab-button.tab-widget.apply"
    # Click the apply button
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, applyButton))).click()

    #sleep(5)
    
    # Wait for loading to stop
    #wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR, "div[class*='clear-glass']")))

    # Escape the search filter to close it
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, regionSearchTextbox))).send_keys(Keys.ESCAPE)

def fetch(metric="HomeSales"): 
    driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver',
                              chrome_options=chrome_options)

    driver.get(baseUrl.format(metric))
    
    wait = WebDriverWait(driver, 45)
    
    wait.until(ajaxComplete)
    #print("page loaded")
    
    # TODO get to work
    #addSelection(wait, "Denver, CO metro area")
    
    wait.until(EC.staleness_of((By.CSS_SELECTOR, "div[id='loadingGlassPane']")))
    
    downloadButton = ".tab-icon-download"
    # Click the download button
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, downloadButton))).click()
    #print("download button clicked")
    
    # Click the crosstab format
    crosstabButton = "#DownloadDialog-Dialog-Body-Id > div > fieldset > button:nth-child(4)"
    
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, crosstabButton))).click()
    #print("crosstab button clicked")

    # Choose CSV export format
    csvFormat = "#export-crosstab-options-dialog-Dialog-BodyWrapper-Dialog-Body-Id > div > div:nth-child(2) > div:nth-child(2) > div > label:nth-child(2)"
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, csvFormat))).click()
    #print("csv option clicked")
        
    # Start the download
    startDownload = "#export-crosstab-options-dialog-Dialog-BodyWrapper-Dialog-Body-Id > div > div:nth-child(3) > button"
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, startDownload))).click()
    #print("start download clicked")
    
    sleep(10)
    
    driver.close()
    
    try:
        # Case for sales data
        df = pd.read_csv("/tmp/{0}.csv".format(metricFilenameLUT[metric]['filename']), encoding='UTF-16', sep='\t')

        #print(df.iloc[0, 1:])

        df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda v: int(v.replace(",", "")))
    except ValueError:
        # Case for median sale price data
        df = pd.read_csv("/tmp/{0}.csv".format(metricFilenameLUT[metric]['filename']),
                         encoding='UTF-16', sep='\t', skiprows=1)

        #print(df.iloc[0, 1:])

        df.iloc[:, 1:] = df.iloc[:, 1:].applymap(lambda v: int(v.replace("K", "000").replace("$", "")))
        
    return df

#df_tmp = fetch("HomeSales")
#fetch("HomeSales").head(10)
In [ ]:
df_rf = pd.read_csv("https://redfin-public-data.s3-us-west-2.amazonaws.com/redfin_covid19/weekly_housing_market_data_most_recent.tsv",
                    sep="\t")
In [ ]:
#df_rf.head()
In [ ]:
metric = "HomeSales"

df_sales = fetch(metric)

df_sales_melted = df_sales.melt(id_vars='Region', var_name='Period', value_name=metricFilenameLUT[metric]['label'])
df_sales_melted = df_sales_melted[df_sales_melted['Region'] == ' National']

df_sales_melted['Year of Period End'] = df_sales_melted['Period'].apply(lambda v: v.split(" ")[-1])
In [ ]:
df_n = pd.DataFrame()
for y in df_sales_melted['Year of Period End'].unique():
    tmp = df_sales_melted[df_sales_melted['Year of Period End'] == y]['Average Homes Sold'].cumsum()
    tmp.index = range(tmp.shape[0])
    tmp.name = y
    df_n = pd.concat([df_n, tmp], axis=1)

monthNames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
intsToMonths = { i: n for i, n in zip(range(12), monthNames) }
    
df_n = df_n.rename(intsToMonths)
In [ ]:
try:
    df_remelted = df_n[['2021', '2020', '2019', '2018', '2017']].reset_index().melt(id_vars=['index']).sort_values(['variable', 'value']).dropna()
except KeyError:
    df_remelted = df_n[['2020', '2019', '2018', '2017']].reset_index().melt(id_vars=['index']).sort_values(['variable', 'value']).dropna()
    
alt.Chart(df_remelted).mark_line().transform_calculate(
    logged='log(datum.value)'
).encode(
    alt.X('index:O', axis=alt.Axis(title='', labelAngle=0), sort=monthNames),
    alt.Y('value:Q', axis=alt.Axis(title='Cumulative Sales'), scale=alt.Scale(zero=False)),
    alt.Color('variable:N', title='Year'),
    tooltip=[alt.Tooltip('index:O', title='Month'), alt.Tooltip('variable:N', title='Year'),
             alt.Tooltip('value:Q', title='Units', format=',d')]
).properties(
    title=['US Annual Home Sales', 'Source: Redfin'],
    width=750,
    height=450,
    background='white'
)
In [ ]:
df_melted['dt'] = df_melted['Period'].map(lambda v: pd.to_datetime(v, format="%B %Y"))

df_yoy = df_melted.set_index('dt')['Average Homes Sold'].pct_change(12).reset_index()

alt.Chart(df_yoy).mark_bar().encode(
    alt.X('dt:T', axis=alt.Axis(title='', format='%b %Y')),
    alt.Y('Average Homes Sold:Q', axis=alt.Axis(title='Year-over-Year Sales Growth [%]', format='%')),
    color=alt.condition("datum['Average Homes Sold'] < 0",
        alt.value('salmon'),
        alt.value('grey')
    ),
    tooltip=[
        alt.Tooltip('dt:T', title='Date'),
        alt.Tooltip('Average Homes Sold:Q', title='Change', format='.0%')
    ]
).properties(
    title=['US Home Sales Growth', 'Source: Redfin'],
    width=750,
    height=450
)
In [ ]:
metric = "MedianSalePrice"

df_sale_price = fetch(metric)

df_sale_price_melted = df_sale_price.melt(id_vars='Region', var_name='Period', value_name=metricFilenameLUT[metric]['label'])
df_sale_price_melted = df_sale_price_melted[df_sale_price_melted['Region'] == ' National']

df_sale_price_melted['Year of Period End'] = df_sale_price_melted['Period'].apply(lambda v: v.split(" ")[-1])
df_sale_price_melted['dt'] = df_sale_price_melted['Period'].apply(lambda v: pd.to_datetime(v, format='%B %Y'))

#df_sale_price_melted.head()
In [ ]:
alt.Chart(df_sale_price_melted).mark_line().encode(
    alt.X('Period:T', axis=alt.Axis(title='', format='%b %y')),
    alt.Y('Median Sale Price:Q', axis=alt.Axis(title='Median Sale Price [USD]'))
).properties(
    title=['US Home Median Sale Price', 'Source: Redfin'],
    width=750,
    height=450
)
In [ ]:
df_sale_price_yoy = df_sale_price_melted.set_index('dt')['Median Sale Price'].pct_change(12).reset_index()

alt.Chart(df_sale_price_yoy).mark_bar().encode(
    alt.X('dt:T', axis=alt.Axis(title='', format='%b %y')),
    alt.Y('Median Sale Price:Q', axis=alt.Axis(title='Year over Year Change [%]', format='%')),
    color=alt.condition("datum['Median Sale Price'] < 0",
        alt.value('salmon'),
        alt.value('grey')
    ),
    tooltip=[
        alt.Tooltip('dt:T', title='Date'),
        alt.Tooltip('Median Sale Price:Q', title='Change', format='.02%')
    ]
).properties(
    title=['US Home Median Price Growth', 'Source: Redfin'],
    width=750,
    height=450
)
In [ ]:
metric = "Inventory"

df_inv = fetch(metric)

df_inv_melted = df_inv.melt(id_vars='Region', var_name='Period', value_name=metricFilenameLUT[metric]['label'])
df_inv_melted = df_inv_melted[df_inv_melted['Region'] == ' National']

df_inv_melted['Year of Period End'] = df_inv_melted['Period'].apply(lambda v: v.split(" ")[-1])
df_inv_melted['dt'] = df_inv_melted['Period'].apply(lambda v: pd.to_datetime(v, format='%B %Y'))

#df_inv_melted.head()
In [ ]:
alt.Chart(df_inv_melted).mark_line().encode(
    alt.X('Period:T', axis=alt.Axis(title='', format='%b %y')),
    alt.Y('Number of Homes:Q', axis=alt.Axis(title='Homes for Sale'))
).properties(
    title=['US Homes For Sale', 'Source: Redfin'],
    width=750,
    height=450
)
In [ ]:
df_inv_yoy = df_inv_melted.set_index('dt')['Number of Homes'].pct_change(12).reset_index()

alt.Chart(df_inv_yoy).mark_bar().encode(
    alt.X('dt:T', axis=alt.Axis(title='', format='%b %y')),
    alt.Y('Number of Homes:Q', axis=alt.Axis(title='Year-over-Year Change [%]', format='%')),
    color=alt.condition("datum['Number of Homes'] < 0",
        alt.value('salmon'),
        alt.value('grey')
    ),
    tooltip=[
        alt.Tooltip('dt:T', title='Date'),
        alt.Tooltip('Number of Homes:Q', title='Change', format='.02%')
    ]
).properties(
    title=['US Home Inventory Growth', 'Source: Redfin'],
    width=750,
    height=450
)
In [ ]:
# TODO: add selection to export
# TODO: Animated map gif for metros
In [ ]:
"""
df_rdc = pd.read_csv("https://econdata.s3-us-west-2.amazonaws.com/Reports/Hotness/RDC_Inventory_Hotness_Metrics_Metro_History.csv")

df_rdc = df_rdc.dropna()


#df_rdc.head()

import numpy as np

dmetro = df_rdc[df_rdc['cbsa_title'].str.contains(".*denver.*")]

dmetro_melted = dmetro[['month_date_yyyymm', 'hotness_score', 'supply_score', 'demand_score']].melt(id_vars=['month_date_yyyymm'])

dmetro_melted['dt'] = dmetro_melted['month_date_yyyymm'].apply(lambda v: pd.to_datetime(v, format='%Y%m'))

alt.Chart(dmetro_melted).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='', format='%B %Y')),
    alt.Y('value:Q', axis=alt.Axis(title='Score')),
    alt.Color('variable:N', title='Category')
).properties(
    title='Realtor.com Market Hotness: Denver-Aurora-Lakewood, CO',
    width=750,
    height=450,
    background='white'
)
"""

© kdunn926