US Macroeconomic Data Explorer | Quarterly Services Survey

Quarterly Services Survey


A look at the service sector, according to the Quarterly Services Survey from the US Census Bureau.

In [1]:
import pandas as pd
import altair as alt
import numpy as np
import re
from joblib import Memory
import datetime

today_date = datetime.date.today()
currentYear = today_date.year
currentQuarter = int(pd.PeriodIndex([today_date], freq='Q').strftime('%q').values[0])

memory = Memory('data/', verbose=0)

# https://www.census.gov/services/qss/historic_data.html

urlFor = lambda y, q: 'http://www2.census.gov/services/qss/{0}/all_{0}q{1}.xls'.format(y, q) if y < 2019\
                 else 'http://www2.census.gov/services/qss/{0}/all_{0}Q{1}.xls'.format(y, q) if y < 2020 or (y < 2020 and q == 1)\
                 else 'https://www2.census.gov/services/qss/{0}/all_{0}Q{1}.xlsx'.format(y, q)

shouldInclude = lambda y, q: (y < currentYear) or ((y == currentYear) and (q < max([1, currentQuarter - 2])))

# [1:] -> omit Q1 2004, :-1 -> omit QN 20XX
urls = [(urlFor(y, q), y, q) for y in range(2004, currentYear + 1) for q in range(1, 5) if shouldInclude(y, q)][1:]

# TODO - try to automate this
# Manually append the most recent reporting period
#urls = urls + [('https://www.census.gov/services/qss/qss-current.xls', 2020, currentQuarter - 1)]

from time import sleep
from urllib.error import HTTPError


isCurrent = lambda y, q: (y == currentYear) and (q == (currentQuarter - 1))


# Depending on the year and quarter, the most useful sheet name 
# evolves from table1 -> table1B -> table1b -> tableA1
def sheetNameFor(y, q):
    if isCurrent(y, q):
        return 'tableA1-{0}Q{1}'.format(y, q)
    if y <= 2007:
        return 'table1-{0}Q{1}'.format(y, q)
        
    elif y == 2008 and q != 4:
        if q != 4:
            return 'table1-{0}Q{1}'.format(y, q)
        else:
            return 'table1B-{0}Q{1}'.format(y, q)
            
    elif y < 2016:
        return 'table1B-{0}Q{1}'.format(y, q)
      
    elif y == 2016:
        if q != 4:
            return 'table1B-{0}Q{1}'.format(y, q)
        else:
             return 'table1b-{0}Q{1}'.format(y, q)
    else:
        return 'table1b-{0}Q{1}'.format(y, q)

#@memory.cache
def tryFetch(u, retries=5):
    if retries < 0:
        return None
    sleep(1)
    try:
        return (u[1], u[2], pd.read_excel(u[0], sheet_name=sheetNameFor(u[1], u[2])))
    except HTTPError as e:
        #print(e)
        return tryFetch(u, retries-1)
    except Exception as e:
        #print(f"failed: {u}")
        return None
    
dfs = [tryFetch(u, retries=5) for u in urls]

if currentQuarter == 0:
    NOOP = 1
elif currentQuarter == 1:
    try:
        # https://www.census.gov/services/qss/qss-current.xlsx
        q = 4
        current = pd.read_excel('https://www.census.gov/services/qss/qss-current.xlsx',
                                sheet_name=sheetNameFor(currentYear - 1, q), engine='openpyxl')
    except Exception:
        q = 3
        current = pd.read_excel('https://www.census.gov/services/qss/qss-current.xlsx',
                                sheet_name=sheetNameFor(currentYear - 1, q), engine='openpyxl')
    dfs.append((currentYear - 1, q, current))
else:
    try:
        # https://www.census.gov/services/qss/qss-current.xlsx
        q = currentQuarter - 1
        current = pd.read_excel('https://www.census.gov/services/qss/qss-current.xlsx',
                                sheet_name=sheetNameFor(currentYear, q), engine='openpyxl')
    except Exception:
        q = currentQuarter - 2
        try:
            current = pd.read_excel('https://www.census.gov/services/qss/qss-current.xlsx',
                                    sheet_name=sheetNameFor(currentYear, q), engine='openpyxl')
            dfs.append((currentYear, q, current))
        except Exception:
            pass
    
# Depending on the year and quarter, we need to skip differing numbers of "header" rows
def indexFor(y, q):
    if y < 2006:
        return 6
    elif y == 2006 and q < 2:
        return 6
    else:
        return 4

revenues = [(d[0], d[1], d[2].iloc[indexFor(d[0], d[1]):, 0:8]) for d in dfs if d is not None]

#%%time

# To make the various NAICS codes align, we have to cleanup certain codes and mark aggregate categories accordingly
def remapCategory(code):
    if 'pt' in code:
        return re.sub('[^\d]+', '', code) + 'pt'
    elif code in ['11', '21', '22', '23', '31-33', '42', '44-45', '48-49', '51', '52', '53',\
                '54', '55', '56', '61', '62', '71', '72', '81', '92']:
        return code + 'c'
    else:
        return code

cleanIt = lambda s: re.sub('(,$|,\sand.*$)', '', re.sub('\s?\(.*$', '', re.sub('\s*\d+$', '', re.sub('\s+', ' ', s.replace('…', '').replace('.', '').strip()))))

# Depending on the year and quarter we need to provide consistent column headers looking back in time from current
def getPrevious(y, q):
    allLastYear = ['{0}Q{1}'.format(y-1, n+1) for n in reversed(range(4))]
    
    if q == 1:
        return ['{}Q1'.format(y)] + allLastYear
    elif q == 2:
        return ['{}Q2'.format(y)] + ['{}Q1'.format(y)] + allLastYear[:-1]
    elif q == 3:
        return ['{}Q3'.format(y)] + ['{}Q2'.format(y)] + ['{}Q1'.format(y)] + allLastYear[:-2]
    elif q == 4:
        return ['{0}Q{1}'.format(y, n+1) for n in reversed(range(4))] + ['{}Q4'.format(y-1)]

crevs = []
for y, q, df in revenues:
    tmp = df.copy()
    
    try:

        # "Current" report has different columns than historical ones
        if isCurrent(y, q):
            #print(y, q)
            #print("Is Current")
            tmp = tmp.iloc[:, :5]
            tmp.columns = ['Code', 'Business'] + getPrevious(y, q)[:-2]

        else:
            tmp.columns = ['Code', 'Business', '{0}Q{1}YTD'.format(y, q)] + getPrevious(y, q)
            
        tmp['Source'] = tmp.Code.map(lambda c: "{0}Q{1}".format(y, q))

        tmp = tmp[pd.notnull(tmp.Business)]
        tmp = tmp[pd.notna(tmp.Business)]
        tmp = tmp[pd.notnull(tmp.Code)]

        # Exclude records reporting "previous definitions"
        tmp = tmp[tmp.Code.map(lambda c: '*' not in str(c))]

        tmp['Code'] = tmp.Code.map(lambda s: remapCategory(str(s)))

        tmp['Business'] = tmp['Business'].map(lambda s: cleanIt(s) if isinstance(s, str) else s)

        crevs.append(tmp.set_index('Code').T)
        
    except ValueError as e:
        print("Failed", e)
        print(y, q)

# Combine the revised dataframes into one
catrevs = pd.concat(crevs, sort=False)

# Use the first valid identifier as the new column header
codeLut = {}
for c in catrevs.columns:
    codeLut[c] = catrevs[c].loc['Business'].dropna().iloc[0]

# Apply categories markers to each code for aggregation
catrevs.columns = list(map(lambda v: codeLut.get(v) + ' ' + v, catrevs.columns))

catrevs = catrevs[catrevs.index.map(lambda v: v[-3:] != 'YTD' and v not in ['Source', 'Business'])]

catrevs = catrevs[~catrevs.index.isin(['2003Q2', '2003Q3'])]

catrevs = catrevs.apply(pd.to_numeric, errors='coerce')
shiftToQuarterEnd = lambda d: d - pd.tseries.offsets.DateOffset(days=1) + pd.tseries.offsets.QuarterEnd()
catrevs['dt'] = catrevs.index.map(pd.to_datetime).map(shiftToQuarterEnd)

revfinal = catrevs.set_index('dt')
#revfinal.tail(15)

# print out the categories
cats = [c for c in revfinal.columns if 'c' in c[-1]]
#cats

# Imports for reading FRED API
from os import environ
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen

try:
    # for local execution
    apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
    apiKeyFromFile = None
    pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)

def getSeries(series="", apiKey=apiKey, description=None):
    # Construct URL template, fetching as much data as possible
    fetchCommand = "https://api.stlouisfed.org/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt" 
    
    # Call the FRED API
    resp = urlopen(fetchCommand.format(s=series, k=apiKey))
    
    # Read and extract the data from the Zipfile response
    zipfile = ZipFile(BytesIO(resp.read()))
    filesInZip = zipfile.namelist()
    data = zipfile.open(filesInZip[1])
    
    if description is None:
        description = series
    
    # Make a well-formed dataframe
    df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
                       names=["date", description, "rt_start", "rt_end"], na_values=".")
    
    df['date'] = pd.to_datetime(df.date)
    
    return df.set_index("date")
table10-2020Q2
table11-2020Q2
table12-2020Q2
table13-2020Q2
table14-2020Q2
table15-2020Q2
table16-2020Q2
table17-2020Q2
table18-2020Q2
table19-2020Q2
table1a-2020Q2
table1b-2020Q2
table20-2020Q2
table21-2020Q2
table2-2020Q2
table22-2020Q2
table23-2020Q2
table24-2020Q2
table25-2020Q2
table26-2020Q2
table3-2020Q2
table4-2020Q2
table5-2020Q2
table6-2020Q2
table7-2020Q2
table8-2020Q2
table9-2020Q2
table10-2020Q3
table11-2020Q3
table12-2020Q3
table13-2020Q3
table14-2020Q3
table15-2020Q3
table16-2020Q3
table17-2020Q3
table18-2020Q3
table19-2020Q3
table1a-2020Q3
table1b-2020Q3
table20-2020Q3
table21-2020Q3
table2-2020Q3
table22-2020Q3
table23-2020Q3
table24-2020Q3
table25-2020Q3
table26-2020Q3
table3-2020Q3
table4-2020Q3
table5-2020Q3
table6-2020Q3
table7-2020Q3
table8-2020Q3
table9-2020Q3
table10-2020Q4
table11-2020Q4
table12-2020Q4
table13-2020Q4
table14-2020Q4
table15-2020Q4
table16-2020Q4
table17-2020Q4
table18-2020Q4
table19-2020Q4
table1a-2020Q4
table1b-2020Q4
table20-2020Q4
table21-2020Q4
table2-2020Q4
table22-2020Q4
table23-2020Q4
table24-2020Q4
table25-2020Q4
table26-2020Q4
table3-2020Q4
table4-2020Q4
table5-2020Q4
table6-2020Q4
table7-2020Q4
table8-2020Q4
table9-2020Q4
table10-2021Q1
table11-2021Q1
table12-2021Q1
table13-2021Q1
table14-2021Q1
table15-2021Q1
table16-2021Q1
table17-2021Q1
table18-2021Q1
table19-2021Q1
table1a-2021Q1
table1b-2021Q1
table20-2021Q1
table21-2021Q1
table2-2021Q1
table22-2021Q1
table23-2021Q1
table24-2021Q1
table25-2021Q1
table26-2021Q1
table3-2021Q1
table4-2021Q1
table5-2021Q1
table6-2021Q1
table7-2021Q1
table8-2021Q1
table9-2021Q1
table10-2021Q2
table11-2021Q2
table12-2021Q2
table13-2021Q2
table14-2021Q2
table15-2021Q2
table16-2021Q2
table17-2021Q2
table18-2021Q2
table19-2021Q2
table1a-2021Q2
table1b-2021Q2
table20-2021Q2
table21-2021Q2
table2-2021Q2
table22-2021Q2
table23-2021Q2
table24-2021Q2
table25-2021Q2
table26-2021Q2
table3-2021Q2
table4-2021Q2
table5-2021Q2
table6-2021Q2
table7-2021Q2
table8-2021Q2
table9-2021Q2
table10-2021Q3
table11-2021Q3
table12-2021Q3
table13-2021Q3
table14-2021Q3
table15-2021Q3
table16-2021Q3
table17-2021Q3
table18-2021Q3
table19-2021Q3
table1a-2021Q3
table1b-2021Q3
table20-2021Q3
table21-2021Q3
table2-2021Q3
table22-2021Q3
table23-2021Q3
table24-2021Q3
table25-2021Q3
table26-2021Q3
table3-2021Q3
table4-2021Q3
table5-2021Q3
table6-2021Q3
table7-2021Q3
table8-2021Q3
table9-2021Q3
table10-2021Q4
table11-2021Q4
table12-2021Q4
table13-2021Q4
table14-2021Q4
table15-2021Q4
table16-2021Q4
table17-2021Q4
table18-2021Q4
table19-2021Q4
table1a-2021Q4
table1b-2021Q4
table20-2021Q4
table21-2021Q4
table2-2021Q4
table22-2021Q4
table23-2021Q4
table24-2021Q4
table25-2021Q4
table26-2021Q4
table3-2021Q4
table4-2021Q4
table5-2021Q4
table6-2021Q4
table7-2021Q4
table8-2021Q4
table9-2021Q4
table10-2022Q1
table11-2022Q1
table12-2022Q1
table13-2022Q1
table14-2022Q1
table15-2022Q1
table16-2022Q1
table17-2022Q1
table18-2022Q1
table19-2022Q1
table1a-2022Q1
table1b-2022Q1
table20-2022Q1
table21-2022Q1
table2-2022Q1
table22-2022Q1
table23-2022Q1
table24-2022Q1
table25-2022Q1
table26-2022Q1
table3-2022Q1
table4-2022Q1
table5-2022Q1
table6-2022Q1
table7-2022Q1
table8-2022Q1
table9-2022Q1
table10-2022Q2
table11-2022Q2
table12-2022Q2
table13-2022Q2
table14-2022Q2
table15-2022Q2
table16-2022Q2
table17-2022Q2
table18-2022Q2
table19-2022Q2
table1a-2022Q2
table1b-2022Q2
table20-2022Q2
table21-2022Q2
table2-2022Q2
table22-2022Q2
table23-2022Q2
table24-2022Q2
table25-2022Q2
table26-2022Q2
table3-2022Q2
table4-2022Q2
table5-2022Q2
table6-2022Q2
table7-2022Q2
table8-2022Q2
table9-2022Q2
table10-2022Q3
table11-2022Q3
table12-2022Q3
table13-2022Q3
table14-2022Q3
table15-2022Q3
table16-2022Q3
table17-2022Q3
table18-2022Q3
table19-2022Q3
table1a-2022Q3
table1b-2022Q3
table20-2022Q3
table21-2022Q3
table2-2022Q3
table22-2022Q3
table23-2022Q3
table24-2022Q3
table25-2022Q3
table26-2022Q3
table3-2022Q3
table4-2022Q3
table5-2022Q3
table6-2022Q3
table7-2022Q3
table8-2022Q3
table9-2022Q3
table10-2022Q4
table11-2022Q4
table12-2022Q4
table13-2022Q4
table14-2022Q4
table15-2022Q4
table16-2022Q4
table17-2022Q4
table18-2022Q4
table19-2022Q4
table1a-2022Q4
table1b-2022Q4
table1c-2022Q4
table20-2022Q4
table21-2022Q4
table22-2022Q4
table23-2022Q4
table24-2022Q4
table25-2022Q4
table26-2022Q4
table2a-2022Q4
table2b-2022Q4
table3-2022Q4
table4-2022Q4
table5-2022Q4
table6-2022Q4
table7-2022Q4
table8-2022Q4
table9-2022Q4
table1a-2022Q4
table1b-2022Q4
table1c-2022Q4
table2a-2022Q4
table2b-2022Q4
table1a-2022Q4
table1b-2022Q4
table1c-2022Q4
table2a-2022Q4
table2b-2022Q4

What is the revenue trend compared accross industries?

In [2]:
# Melt and re-aggregate to combine preliminary and final revisions (via average)
tmp = revfinal[cats].reset_index().melt(id_vars=['dt']).copy()
categories_agg = tmp.groupby(['dt', 'variable']).mean().reset_index()

alt.Chart(categories_agg).mark_line().encode(
    alt.X('dt', axis=alt.Axis(title=None)),
    alt.Y('value:Q', axis=alt.Axis(title='Quarterly Revenues [Million USD]')),
    alt.Color('variable:N', title="NAICS Group"),
    tooltip=[alt.Tooltip('dt:T', format="%b %Y"), alt.Tooltip('variable:N'), alt.Tooltip('value:Q', format="$,d")]
).properties(
    title="Quarterly Revenues by NAICS category (Census Quarterly Services Survey)",
    background="white",
    width=750,
    height=450
)
Out[2]:

What proportion of total reported revenues is each industry?

In [3]:
alt.Chart(categories_agg).mark_bar().encode(
    alt.X('dt:T', axis=alt.Axis(title=None)),
    alt.Y('value:Q', axis=alt.Axis(title='Quarterly Revenues [Million USD]')),
    alt.Color('variable:N', title="NAICS Group"),
    tooltip=[alt.Tooltip('dt:T', format="%b %Y"), alt.Tooltip('variable:N'), alt.Tooltip('value:Q', format="$,d")]
).properties(
    title="Quarterly Revenues by NAICS category (Census Quarterly Services Survey)",
    background="white",
    width=750,
    height=450
)
Out[3]:
In [4]:
# TODO:

# - Correct these values for inflation
# - Try to correlate tax revenues per sector with each sectors' contribution to GDP
In [5]:
df_cpi = getSeries("CWUR0000SA0", apiKey)
df_ppi = getSeries("PPIACO", apiKey)

def adjustValueWithDate(val, d):
    date = d + pd.tseries.offsets.DateOffset(days=1)
    valOrVals = (val / (df_ppi.iloc[df_ppi.index.get_loc(date)]['PPIACO'] / 100.))
    return valOrVals if isinstance(valOrVals, np.float64) else valOrVals[-1]

adj_categories_agg = pd.DataFrame.from_records([(dt, variable, value, adjustValueWithDate(value, dt))
                                                for ix, dt, variable, value in categories_agg.itertuples()])
adj_categories_agg.columns = ['dt', 'variable', 'value', 'value_adj']

What is the inflation-adjusted revenue trend compared accross industries?

In [6]:
alt.Chart(adj_categories_agg).mark_line().encode(
    alt.X('dt', axis=alt.Axis(title=None)),
    alt.Y('value_adj:Q', axis=alt.Axis(title='Quarterly Revenues [Million 1982-1984 USD]')),
    alt.Color('variable:N', title="NAICS Group"),
    tooltip=[alt.Tooltip('dt:T', format="%b %Y"), alt.Tooltip('variable:N'), alt.Tooltip('value:Q', format="$,d")]
).properties(
    title="Quarterly Revenues by NAICS category (Census Quarterly Services Survey, PPI-adjusted)",
    width=750,
    height=450
)
Out[6]:
In [7]:
%%capture

def getEmploymentSeries(seriesId="CEU5500000001", name="financial activities"):
    # Finance and insurance
    url = 'https://data.bls.gov/timeseries/{}?amp%222bdata_tool=XGtable&output_view=data&from_year=1990&to_year=2020'
    furl = url.format(seriesId)
    #print(furl)
    df_employment = pd.read_html(furl)[1]
    #print(df_employment.head())
    df_employment.columns = ['Year'] + df_employment.columns.tolist()[1:]
    
    df_emp = df_employment.iloc[:, :-1].melt(id_vars='Year')

    df_emp = df_emp[df_emp.Year != 'P : preliminary']

    df_emp['Date'] = pd.to_datetime(df_emp['variable'] + " " + df_emp['Year'].apply(str), format="%b %Y")
    df_emp[name] = pd.to_numeric(df_emp['value'], errors='coerce')
    
    #print(df_emp.head(1))
    
    return df_emp[['Date', name]].sort_values('Date').set_index('Date').copy()

dfs_employment = []

industries = {
    'goods-producing': 'CEU0600000001',
    'mining and logging': 'CEU1000000001',
    'construction': 'CEU2000000001',
    'manufacturing': 'CEU3000000001',
    'retail trade': 'CEU4200000001',
    'transportation and warehousing': 'CEU4300000001',
    'information': 'CEU5000000001',
    'financial activities': 'CEU5500000001',
    'professional services': 'CEU6000000001',
    'education and health': 'CEU6500000001',
    'government': 'CEU9000000001'
}

for i in industries.keys():
    dfs_employment.append(getEmploymentSeries(industries[i], i))
    
df_employment = pd.concat(dfs_employment, axis=1)

exclude = [
    'Adminstrative and support and waste management 56c',
    'Arts, entertainment 71c',
    'Other services 81c',
    'Utilities 22c',
    'Real estate and rental and leasing 53c',
    'Educational services 61c'
]

def computeDollarsPerCapita(val, variable, date):
    if variable == 'Information 51c':
        return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['information']
    elif variable == 'Finance and insurance 52c':
        return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['financial activities']
    elif variable == 'Health care and social assistance 62c':
        return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['education and health']
    elif variable == 'Professional, scientific 54c':
        return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['professional services']
    elif variable == 'Transportation and warehousing 48-49c':
        return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['transportation and warehousing']

revenue_per_employee = pd.DataFrame.from_records([(dt, variable, value, computeDollarsPerCapita(value_adj, variable, dt))
                                                for ix, dt, variable, value, value_adj in adj_categories_agg[~adj_categories_agg.variable.isin(exclude)].itertuples()])

revenue_per_employee.columns = ['dt', 'variable', 'value', 'value_adj']

What is the inflation-adjusted revenue per employee trend compared across industries?

In [8]:
c = alt.Chart(revenue_per_employee).mark_line().encode(
    alt.X('dt', axis=alt.Axis(title=None)),
    alt.Y('value_adj:Q', axis=alt.Axis(title='Quarterly Revenues per Employee [Million USD]')),
    alt.Color('variable:N', title='NAICS Group')
).properties(
    title="Quarterly Revenues per Employee by NAICS category (Census Quarterly Services Survey, PPI-adjusted)",
    background="white",
    width=750,
    height=450
)

c.display()
In [9]:
step = 65
overlap = 3

def doRidgeLineFor(df, x, y, row, title='Quarterly Revenue Growth By Sector [YoY % Change]'):
    return alt.Chart(df, height=step).transform_joinaggregate(
        mean_val=f'mean({x})', groupby=[row]
    ).mark_area(
        interpolate='monotone',
        fillOpacity=0.8,
        stroke='lightgray',
        strokeWidth=0.5
    ).encode(
        alt.X(f'{x}:T', title=''),
        alt.Y(
            f'{y}:Q',
            scale=alt.Scale(range=[step, -step * overlap]),
            axis=None
        ),
        alt.Fill(
            f'{row}:N',
            title='Sector',
            #legend=None,
            scale=alt.Scale(scheme='accent')
        ),
        tooltip=[alt.Tooltip(f'{x}:T'), alt.Tooltip(f'{row}:N'), alt.Tooltip(f'{y}:Q', format=',.02f')]
    ).facet(
        row=alt.Row(
            f'{row}:N',
            title=None,
            header=alt.Header(labelColor='white') #, labelAnchor='end')
        )
    ).properties(
        title=title,
        bounds='flush'
    ).configure_facet(
        spacing=0
    ).configure_view(
        stroke=None,
        width=650
    ).configure_title(
        anchor='middle'
    )
    
categories_yoy = adj_categories_agg.copy().set_index('dt')[['variable', 'value_adj']].pivot(columns='variable')\
            .applymap(float).pct_change(4).applymap(lambda v: v * 100).reset_index().melt(id_vars='dt')

c = doRidgeLineFor(categories_yoy[['dt', 'variable', 'value']].dropna(), 'dt', 'value', 'variable')
c.save('services.png')

c.display()
In [10]:
r_p_e = pd.DataFrame.from_records([(dt, variable, value, computeDollarsPerCapita(value_adj, variable, dt))
                                                for ix, dt, variable, value, value_adj in adj_categories_agg.itertuples()])

r_p_e.columns = ['dt', 'variable', 'value', 'value_adj']

r_p_e_yoy = r_p_e.set_index('dt')[['variable', 'value_adj']].pivot(columns='variable')\
            .applymap(float).pct_change(4).applymap(lambda v: v * 100).reset_index().melt(id_vars='dt')

doRidgeLineFor(r_p_e_yoy[['dt', 'variable', 'value']], 'dt', 'value', 'variable', title='Quarterly Revenue Growth (per employee) By Sector [YoY % Change]')
/tmp/ipykernel_5049/3880904604.py:55: FutureWarning: Passing method to DatetimeIndex.get_loc is deprecated and will raise in a future version. Use index.get_indexer([item], method=...) instead.
  return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['financial activities']
/tmp/ipykernel_5049/3880904604.py:57: FutureWarning: Passing method to DatetimeIndex.get_loc is deprecated and will raise in a future version. Use index.get_indexer([item], method=...) instead.
  return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['education and health']
/tmp/ipykernel_5049/3880904604.py:53: FutureWarning: Passing method to DatetimeIndex.get_loc is deprecated and will raise in a future version. Use index.get_indexer([item], method=...) instead.
  return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['information']
/tmp/ipykernel_5049/3880904604.py:59: FutureWarning: Passing method to DatetimeIndex.get_loc is deprecated and will raise in a future version. Use index.get_indexer([item], method=...) instead.
  return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['professional services']
/tmp/ipykernel_5049/3880904604.py:61: FutureWarning: Passing method to DatetimeIndex.get_loc is deprecated and will raise in a future version. Use index.get_indexer([item], method=...) instead.
  return val / df_employment.iloc[df_employment.index.get_loc(pd.to_datetime(date), method='nearest')]['transportation and warehousing']
Out[10]:

© kdunn926