US Macroeconomic Data Explorer | US Dairy Production

US Dairy Production


A look at US dairy production, according to the US Department of Agriculture.

In [1]:
import pandas as pd
import altair as alt
import numpy as np
import re
In [2]:
# https://www.ers.usda.gov/data-products/dairy-data/

df_dairy = pd.read_excel("https://www.ers.usda.gov/webdocs/DataFiles/48685/quarterlymilkfactors_1_.xlsx", engine='openpyxl', skiprows=4)

df_dairy.columns = ["YearAndQuarter", "Year", "MilkCows", "productionLbsMilkPerCow", "productionMillionLbs",
                    "NA", "feedDollarPerCwt", "replacementPricePerHead"]

#df_dairy = df_dairy.iloc[:, :-5]

findYearLabels = lambda v: v if isinstance(v, int) or (isinstance(v, str) and re.match("^\d+$", v) is not None) else None

# Shift the year label over one column and forward fill it
df_dairy['Year'] = df_dairy['YearAndQuarter'].map(findYearLabels)
df_dairy['Year'] = df_dairy['Year'].ffill()

# Subset to only quarterly rows
df_dairy = df_dairy[~df_dairy['YearAndQuarter'].str.contains("^(?:\d|Avg)", regex=True, na=True)]

# Parse out the datetime value
df_dairy['dt'] = pd.to_datetime(df_dairy['YearAndQuarter'].map(lambda v: re.sub("[^A-z]", "", v.split("-")[-1])) + df_dairy['Year'].apply(int).apply(str),
                                format="%b%Y", errors='coerce')

# Drop all rows with invalid timestemp and the NA column
df_dairy = df_dairy[~pd.isna(df_dairy['dt'])] #.drop('NA', axis=1)

#df_dairy.tail(10)
In [3]:
alt.Chart(df_dairy).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('feedDollarPerCwt:Q', axis=alt.Axis(title='Value, 16% protein [$/cwt]'))
).properties(
    title='US Dairy Cow Feed Value (per USDA)',
    width=750,
    height=450
)
Out[3]:
In [4]:
def yoyCowChartFor(df, metric, label, units=''):
    return alt.Chart(df).mark_bar().transform_window(
        window=[alt.WindowFieldDef("lag", metric, param=12, **{ 'as': 'previous' })],
        sort=[{"field": "dt"}],
    ).transform_calculate(
        yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
    ).transform_filter(
        "datum.yoy != -100"
    ).encode(
        alt.X('dt:T', axis=alt.Axis(title='')),
        alt.Y('yoy:Q', axis=alt.Axis(title=f'{label} [Year-over-year % change]')),
        color=alt.condition(f"datum.yoy < 0",
            alt.value('lightsalmon'),
            alt.value('royalblue')
        ),
        tooltip=[alt.Tooltip('dt:T', title='Date'), alt.Tooltip('yoy:Q', title='% Change', format=',.1f')]
    ).properties(
        title=f'US {label} (per USDA)',
        width=750,
        height=450
    )

yoyCowChartFor(df_dairy, metric='feedDollarPerCwt', label="Dairy Cow Feed Value", units='$/cwt')
Out[4]:
In [5]:
alt.Chart(df_dairy).mark_line().transform_calculate(
    cows='datum.MilkCows < 10 ? NaN : datum.MilkCows * 1000'
).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('cows:Q', axis=alt.Axis(title='Cows'))
).properties(
    title='US Dairy Cows (per USDA)',
    width=750,
    height=450
)
Out[5]:
In [6]:
yoyCowChartFor(df_dairy, metric='MilkCows', label="Dairy Cow Population", units='cows')
Out[6]:
In [7]:
c = alt.Chart(df_dairy).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('productionMillionLbs:Q', axis=alt.Axis(title='Milk [million-lbs]'))
).properties(
    title='US Dairy Cow Production (per USDA)',
    width=750,
    height=450
)

c.save('dairy.png')

c.display()
In [8]:
yoyCowChartFor(df_dairy, metric='productionMillionLbs', label="Dairy Cow Milk Production", units='million-lbs')
Out[8]:
In [9]:
df_dairy_products_raw = pd.read_excel("https://www.ers.usda.gov/webdocs/DataFiles/48685/CmDsProd.xlsx", engine='openpyxl',
                                      sheet_name=None)

# Subset the workbook for only the monthly data
df_dairy_products_dict = { k: s for k, s in df_dairy_products_raw.items() if "monthly" in k }
Amer cheese, monthly
Dry skim milk, monthly
Dry whey, monthly
Lactose, monthly
Other cheese, monthly
WPC, monthly
Butter, monthly
In [10]:
butterCols = ['Year', 'Month', 'Beginning-commercial-stocks', 'Production',
       'Imports', 'Total supply', "USDA-net-removals-barters-CCC-donations",
       'Domestic-commercial-disappearance', 'Commercial-exports',
       'Total-commercial-disappearance', 'Ending-commercial-stocks', 'NA']

try:
    df_dairy_products_dict['Butter, monthly'].columns = butterCols
except ValueError:
    df_dairy_products_dict['Butter, monthly'].columns = butterCols[:-1]
    
df_dairy_products_dict['Butter, monthly']['Year'] = df_dairy_products_dict['Butter, monthly']['Year'].ffill()

#df_dairy_products_dict['Butter, monthly'].iloc[2:].head(15)

#df_dairy_products_dict['Butter, monthly'].head(15)
In [11]:
alt.Chart(df_dairy_products_dict['Butter, monthly']).mark_line().transform_calculate(
    dt="timeParse(datum.Year + ' ' + datum.Month, '%Y %b')"
).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('Production:Q', axis=alt.Axis(title='Production [million-lbs]'))
).properties(
    title='US Butter Production (per USDA)',
    width=750,
    height=450
)
Out[11]:
In [12]:
skimMilkCols = ["Year", "Month", 'Supply-Nonfat-dry-milk-NDM',
       'Supply-Dry-skim-milk-for-animal-use', 'Supply-Total', 'Production-NDM',
       'Production-Skim milk powder', 'Production-Dry-skim-milk-for-animal-use',
       'Production-Total', "Imports", "Total-supply", "USDA-net-removals-barters-CCC-donations",
       'Commercial-Domestic-disappearance', 'Commercial-exports', 'Commercial-Total-disappearance', 
       'Ending-stocks-NDM', 'Ending-stocks-Dry-skim-milk-for-animal-use', 'Ending-stocks-Total']

df_dairy_products_dict['Dry skim milk, monthly'].columns = skimMilkCols
df_dairy_products_dict['Dry skim milk, monthly'] = df_dairy_products_dict['Dry skim milk, monthly'].iloc[3:].copy()
df_dairy_products_dict['Dry skim milk, monthly']['Year'] = df_dairy_products_dict['Dry skim milk, monthly']['Year'].ffill()

#df_dairy_products_dict['Dry skim milk, monthly'].head(10)
In [13]:
alt.Chart(df_dairy_products_dict['Dry skim milk, monthly']).mark_line().transform_calculate(
    dt="timeParse(datum.Year + ' ' + datum.Month, '%Y %b')"
).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('Production-Total:Q', axis=alt.Axis(title='Production [million-lbs]'))
).properties(
    title='US Dry Skim Milk Production (per USDA)',
    width=750,
    height=450
)
Out[13]:
In [14]:
amCheeseCols = ["Year", "Month", "Beginning-commercial-stocks", "Production", "Imports", "Total-supply",
                "USDA-net-removals-barters-CCC-donations", "Domestic-commercial-disappearance",
                "Commercial-exports", "commercial-disappearance", "Ending commercial stocks"]

df_dairy_products_dict['Amer cheese, monthly'].columns = amCheeseCols
df_dairy_products_dict['Amer cheese, monthly'] = df_dairy_products_dict['Amer cheese, monthly'].iloc[2:].copy()
df_dairy_products_dict['Amer cheese, monthly']['Year'] = df_dairy_products_dict['Amer cheese, monthly']['Year'].ffill()

#df_dairy_products_dict['Amer cheese, monthly'].head(10)
In [15]:
alt.Chart(df_dairy_products_dict['Amer cheese, monthly']).mark_line().transform_calculate(
    dt="timeParse(datum.Year + ' ' + datum.Month, '%Y %b')"
).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('Production:Q', axis=alt.Axis(title='Production [million-lbs]'))
).properties(
    title='US American Cheese Production (per USDA)',
    width=750,
    height=450
)
Out[15]:
In [16]:
otherCheeseCols = ["Year", "Month", "Beginning-commercial-stocks", "Production", "Imports", "Total-supply",
                   "USDA-net-removals-barters-CCC-donations", "Domestic-commercial-disappearance",
                   "Commercial-exports", "Total-commercial-disappearance", "Ending-commercial-stocks"]

df_dairy_products_dict['Other cheese, monthly'].columns = otherCheeseCols
df_dairy_products_dict['Other cheese, monthly'] = df_dairy_products_dict['Other cheese, monthly'].iloc[2:].copy()
df_dairy_products_dict['Other cheese, monthly']['Year'] = df_dairy_products_dict['Other cheese, monthly']['Year'].ffill()

#df_dairy_products_dict['Other cheese, monthly'].head(10)
In [17]:
alt.Chart(df_dairy_products_dict['Other cheese, monthly']).mark_line().transform_calculate(
    dt="timeParse(datum.Year + ' ' + datum.Month, '%Y %b')"
).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('Production:Q', axis=alt.Axis(title='Production [million-lbs]'))
).properties(
    title='US Other Cheese Production (per USDA)',
    width=750,
    height=450
)
Out[17]:
In [18]:
market = df_dairy_products_dict['Dry whey, monthly'].iloc[0].ffill()
category = df_dairy_products_dict['Dry whey, monthly'].iloc[1].ffill(limit=2)
usage = df_dairy_products_dict['Dry whey, monthly'].iloc[2]

noNans = lambda v: filter(lambda s: isinstance(s, str) and s != "nan", list(v))
onlyChars = lambda v: re.sub("[^A-z ]", "", str(v).strip())

wheyCols = ["-".join(map(onlyChars, noNans(c))).replace(" -", "-").replace(" ", "-") for c in zip(market, category, usage)]

df_dairy_products_dict['Dry whey, monthly'].columns = wheyCols
df_dairy_products_dict['Dry whey, monthly'] = df_dairy_products_dict['Dry whey, monthly'].iloc[3:310].copy()
df_dairy_products_dict['Dry whey, monthly']['Year'] = df_dairy_products_dict['Dry whey, monthly']['Year'].ffill()

#df_dairy_products_dict['Dry whey, monthly']
In [19]:
alt.Chart(df_dairy_products_dict['Dry whey, monthly']).mark_line().transform_calculate(
    dt="timeParse(datum.Year + ' ' + datum.Month, '%Y %b')"
).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('Supply-Production-Total:Q', axis=alt.Axis(title='Production [million-lbs]'))
).properties(
    title='US Whey Production (per USDA)',
    width=750,
    height=450
)
Out[19]:
In [20]:
df_dairy_products_dict['WPC, monthly'].columns = wheyCols
df_dairy_products_dict['WPC, monthly'] = df_dairy_products_dict['WPC, monthly'].iloc[3:310].copy()
df_dairy_products_dict['WPC, monthly']['Year'] = df_dairy_products_dict['WPC, monthly']['Year'].ffill()

#df_dairy_products_dict['WPC, monthly']
In [21]:
alt.Chart(df_dairy_products_dict['WPC, monthly']).mark_line().transform_calculate(
    dt="timeParse(datum.Year + ' ' + datum.Month, '%Y %b')"
).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('Supply-Production-Total:Q', axis=alt.Axis(title='Production [million-lbs]'))
).properties(
    title='US Whey Protein Concentrate Production (per USDA)',
    width=750,
    height=450
)
Out[21]:
In [22]:
#'Lactose, monthly'

lactoseCols = ["Year", "Month", "Beginning manufacturers' stocks", "Production", "Imports", "Total-supply",
               "Domestic-commercial-disappearance", "Commercial-exports", "Total-commercial-disappearance",
               "Ending-manufacturers-stocks", "NA1", "NA2", "NA3"]

try:
    df_dairy_products_dict['Lactose, monthly'].columns = lactoseCols
except ValueError:
    df_dairy_products_dict['Lactose, monthly'].columns = lactoseCols[:-3]
    
df_dairy_products_dict['Lactose, monthly'] = df_dairy_products_dict['Lactose, monthly'].iloc[2:309].copy()
df_dairy_products_dict['Lactose, monthly'].loc[:, 'Year'] = df_dairy_products_dict['Lactose, monthly']['Year'].ffill()

#df_dairy_products_dict['Lactose, monthly']
In [23]:
alt.Chart(df_dairy_products_dict['Lactose, monthly']).mark_line().transform_calculate(
    dt="timeParse(datum.Year + ' ' + datum.Month, '%Y %b')"
).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('Production:Q', axis=alt.Axis(title='Production [million-lbs]'))
).properties(
    title='US Lactose Production (per USDA)',
    width=750,
    height=450
)
Out[23]:
In [24]:
def yoyChartFor(df, sheet, metric, label='Lactose Production'):
    return alt.Chart(df[sheet]).mark_bar(size=2).transform_calculate(
        dt="timeParse(datum.Year + ' ' + datum.Month, '%Y %b')",
    ).transform_window(
        window=[alt.WindowFieldDef("lag", metric, param=12, **{ 'as': 'previous' })],
        sort=[{"field": "dt"}],
    ).transform_calculate(
        yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
    ).encode(
        alt.X('dt:T', axis=alt.Axis(title='')),
        alt.Y('yoy:Q', axis=alt.Axis(title=f'{label} [Year-over-year % change]')),
        color=alt.condition(f"datum.yoy < 0",
            alt.value('lightsalmon'),
            alt.value('royalblue')
        ),
        tooltip=[alt.Tooltip('yoy:Q', title='% Change', format=',.1f')]
    ).properties(
        title=f'US {label} (per USDA)',
        width=750,
        height=450
    )

yoyChartFor(df_dairy_products_dict, 'Lactose, monthly', 'Production', label='Lactose Production')
Out[24]:
In [25]:
yoyChartFor(df_dairy_products_dict, 'WPC, monthly', 'Supply-Production-Total', label='WPC Production')
Out[25]:
In [26]:
yoyChartFor(df_dairy_products_dict, 'Dry whey, monthly', 'Supply-Production-Total', label='Dry whey Production')
Out[26]:
In [27]:
yoyChartFor(df_dairy_products_dict, 'Other cheese, monthly', 'Production', label='Other cheese Production')
Out[27]:
In [28]:
yoyChartFor(df_dairy_products_dict, 'Amer cheese, monthly', 'Production', label='American cheese Production')
Out[28]:
In [29]:
yoyChartFor(df_dairy_products_dict, 'Dry skim milk, monthly', 'Production-Total', label='Dry Skim Milk Production')
Out[29]:
In [30]:
yoyChartFor(df_dairy_products_dict, 'Butter, monthly', 'Production', label='Butter Production')
Out[30]:

© kdunn926