US Macroeconomic Data Explorer | Electricity Consumption

Electricity Consumption


A look at electricity consumption, according to the Monthly Electric Power Industry Report (Form EIA-861M) from U.S. Energy Information Administration.

In [1]:
import pandas as pd
import numpy as np
import altair as alt
In [2]:
from os import environ

# Load the FRED API key - register for one here: 
# https://research.stlouisfed.org/useraccount/login/secure/
try:
    # for local execution
    apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
    apiKeyFromFile = None
    pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)

from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
    
def getSeries(series="", description=None, apiKey=apiKey):
    """
    Fetch a data series from the St. Louis FED system (FRED)
    and return it as a Pandas dataframe indexed by date
    """
    baseUrl = "https://api.stlouisfed.org"
    endpoint = "/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt" 
    
    response = urlopen(baseUrl + endpoint.format(s=series, k=apiKey))
    zipfile = ZipFile(BytesIO(response.read()))
    
    filesInZip = zipfile.namelist()
    
    data = zipfile.open(filesInZip[1])

    if description is None:
        description = series
        
    df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
                       names=["date", description, "rt_start", "rt_end"], na_values=".")
    
    df['date'] = pd.to_datetime(df.date)
    
    return df.set_index("date")
In [3]:
df_us_gdp = getSeries("GDPC1", description="GDP").resample("1Y").last()
In [4]:
df_elec = pd.read_excel("https://www.eia.gov/electricity/data/eia861m/xls/sales_revenue.xlsx", engine='openpyxl')

levels = []
for c in df_elec.columns:
    if "Unnamed" in c:
        continue
    else:
        for i in range(4):
            levels.append(c.title())
            
categories = [(l, "{} [{}]".format(m, u)) for m, u, l in zip(*df_elec.iloc[0:2, 4:].values, levels)]

df_elec = df_elec.set_index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3'])
df_elec = df_elec.rename_axis(["Year", "Month", "State", "Status"])

df_elec.columns = pd.MultiIndex.from_tuples(categories)
df_elec = df_elec.iloc[2:-1]
In [5]:
%%capture

df_e = df_elec.reset_index(level=0)
columns = [df_e.columns.to_list()[0], df_e.columns.to_list()[-3]]

df_megawatts = df_e[columns].groupby("Year").agg("sum")
df_megawatts.columns = ['Megawatt-hours']
df_megawatts.index = pd.to_datetime(df_megawatts.index, format="%Y")
In [6]:
c = alt.Chart(df_megawatts.reset_index()).mark_line().encode(
    alt.X('Year:T', axis=alt.Axis(title='')),
    alt.Y('Megawatt-hours:Q')
).properties(
    title='Total US Electricity Consumption (all sectors)',
    height=450,
    width=700,
    background='white'
)

c.save("electricity.png")

c.display()
In [7]:
step = 120
overlap = 1

def doRidgeLineFor(df, x, y, row, title='US Growth of Electricity Consumption by Sector'):
    return alt.Chart(df, height=step).transform_joinaggregate(
        mean_val=f'mean({x})', groupby=[row]
    ).mark_area(
        interpolate='monotone',
        fillOpacity=0.9,
        stroke='lightgray',
        strokeWidth=0.5
    ).encode(
        alt.X(f'{x}:T', title=''),
        alt.Y(
            f'{y}:Q',
            scale=alt.Scale(range=[step, -step * overlap]),
            axis=None
        ),
        alt.Fill(
            f'{row}:N',
            title='Sector',
            #legend=None,
            scale=alt.Scale(scheme='accent')
        ),
        tooltip=[alt.Tooltip(f'{x}:T'), alt.Tooltip(f'{row}:N'), alt.Tooltip(f'{y}:Q', format=',.02f')]
    ).facet(
        row=alt.Row(
            f'{row}:N',
            title=None,
            header=alt.Header(labelColor='white') #, labelAnchor='end')
        )
    ).properties(
        title=title,
        bounds='flush'
    ).configure_facet(
        spacing=0
    ).configure_view(
        stroke=None,
        width=750
    ).configure_title(
        anchor='middle'
    )
    
df_elec_yoy = df_e.copy().reset_index()

sectors = [c for c in df_e.columns.to_list() if "Sales" in c[1] and c[0] not in ['Total', 'Transportation', 'Other']]

df_elec_yoy['dt'] = pd.to_datetime(df_elec_yoy['Year'].apply(str) + '-' + df_elec_yoy['Month'].apply(str), format="%Y-%m")

df_elec_yoy[sectors] = df_elec_yoy[sectors].applymap(lambda v: pd.to_numeric(v, errors='coerce'))

df_elec_yoy = df_elec_yoy.set_index('dt')[sectors]\
                    .groupby(["dt"])\
                    .agg(sum)\
                    .pct_change(12)\
                    .apply(lambda v: v * 100)\
                    .reset_index()

"""    
c = alt.Chart(df_elec_yoy.melt(id_vars='dt').dropna()).mark_bar(width=1.5).encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('value:Q', axis=alt.Axis(title='Year-over-year Growth [%]')),
    alt.Row('variable_0:N', title='Sector'),
    alt.Color('variable_0:N', title='Sector'),
    tooltip=[alt.Tooltip('dt:T', format='%b %Y'), alt.Tooltip('value:Q', format=",.02")]
).properties(
    title='US Growth of Electricity Consumption by Sector',
    width=700,
    background='white'
)
""";

c = doRidgeLineFor(df_elec_yoy.melt(id_vars='dt')[['dt', 'variable_0', 'value']].dropna(), 'dt', 'value', 'variable_0')
c.save('electricity.png')
In [8]:
c.display()
In [9]:
cmp = df_megawatts.resample("1Y").last().join(df_us_gdp)
cmp['year'] = cmp.index.map(lambda d: d.year)
cmp['decade'] = cmp.index.map(lambda v: "19{}0's".format(str(v)[2]) if v.year < 2000 else "20{}0's".format(str(v)[2]))

tegdp = alt.Chart(cmp.reset_index()).mark_point().encode(
    alt.Y('Megawatt-hours', axis=alt.Axis(grid=False, title='Consumption [Megawatt-hours]'), scale=alt.Scale(zero=False, padding=35)),
    alt.X('GDP', axis=alt.Axis(grid=False, title="GDP [USD]"), scale=alt.Scale(zero=False, padding=35)),
    alt.Color('decade:N', title='Decade'),
).properties(
    title="Total Electricity Consumption vs U.S. Real Gross Domestic Product",
    width=1000,
    height=500
)

text = tegdp.mark_text(
    align='left',
    baseline='middle',
    dx=7,
    size=10
).encode(
    text='year:N'
)

tegdp + text
Out[9]:

© kdunn926