A look at electricity consumption, according to the Monthly Electric Power Industry Report (Form EIA-861M) from U.S. Energy Information Administration.
import pandas as pd
import numpy as np
import altair as alt
from os import environ
# Load the FRED API key - register for one here:
# https://research.stlouisfed.org/useraccount/login/secure/
try:
# for local execution
apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
apiKeyFromFile = None
pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
def getSeries(series="", description=None, apiKey=apiKey):
"""
Fetch a data series from the St. Louis FED system (FRED)
and return it as a Pandas dataframe indexed by date
"""
baseUrl = "https://api.stlouisfed.org"
endpoint = "/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt"
response = urlopen(baseUrl + endpoint.format(s=series, k=apiKey))
zipfile = ZipFile(BytesIO(response.read()))
filesInZip = zipfile.namelist()
data = zipfile.open(filesInZip[1])
if description is None:
description = series
df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
names=["date", description, "rt_start", "rt_end"], na_values=".")
df['date'] = pd.to_datetime(df.date)
return df.set_index("date")
df_us_gdp = getSeries("GDPC1", description="GDP").resample("1Y").last()
df_elec = pd.read_excel("https://www.eia.gov/electricity/data/eia861m/xls/sales_revenue.xlsx", engine='openpyxl')
levels = []
for c in df_elec.columns:
if "Unnamed" in c:
continue
else:
for i in range(4):
levels.append(c.title())
categories = [(l, "{} [{}]".format(m, u)) for m, u, l in zip(*df_elec.iloc[0:2, 4:].values, levels)]
df_elec = df_elec.set_index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3'])
df_elec = df_elec.rename_axis(["Year", "Month", "State", "Status"])
df_elec.columns = pd.MultiIndex.from_tuples(categories)
df_elec = df_elec.iloc[2:-1]
%%capture
df_e = df_elec.reset_index(level=0)
columns = [df_e.columns.to_list()[0], df_e.columns.to_list()[-3]]
df_megawatts = df_e[columns].groupby("Year").agg("sum")
df_megawatts.columns = ['Megawatt-hours']
df_megawatts.index = pd.to_datetime(df_megawatts.index, format="%Y")
c = alt.Chart(df_megawatts.reset_index()).mark_line().encode(
alt.X('Year:T', axis=alt.Axis(title='')),
alt.Y('Megawatt-hours:Q')
).properties(
title='Total US Electricity Consumption (all sectors)',
height=450,
width=700,
background='white'
)
c.save("electricity.png")
c.display()
step = 120
overlap = 1
def doRidgeLineFor(df, x, y, row, title='US Growth of Electricity Consumption by Sector'):
return alt.Chart(df, height=step).transform_joinaggregate(
mean_val=f'mean({x})', groupby=[row]
).mark_area(
interpolate='monotone',
fillOpacity=0.9,
stroke='lightgray',
strokeWidth=0.5
).encode(
alt.X(f'{x}:T', title=''),
alt.Y(
f'{y}:Q',
scale=alt.Scale(range=[step, -step * overlap]),
axis=None
),
alt.Fill(
f'{row}:N',
title='Sector',
#legend=None,
scale=alt.Scale(scheme='accent')
),
tooltip=[alt.Tooltip(f'{x}:T'), alt.Tooltip(f'{row}:N'), alt.Tooltip(f'{y}:Q', format=',.02f')]
).facet(
row=alt.Row(
f'{row}:N',
title=None,
header=alt.Header(labelColor='white') #, labelAnchor='end')
)
).properties(
title=title,
bounds='flush'
).configure_facet(
spacing=0
).configure_view(
stroke=None,
width=750
).configure_title(
anchor='middle'
)
df_elec_yoy = df_e.copy().reset_index()
sectors = [c for c in df_e.columns.to_list() if "Sales" in c[1] and c[0] not in ['Total', 'Transportation', 'Other']]
df_elec_yoy['dt'] = pd.to_datetime(df_elec_yoy['Year'].apply(str) + '-' + df_elec_yoy['Month'].apply(str), format="%Y-%m")
df_elec_yoy[sectors] = df_elec_yoy[sectors].applymap(lambda v: pd.to_numeric(v, errors='coerce'))
df_elec_yoy = df_elec_yoy.set_index('dt')[sectors]\
.groupby(["dt"])\
.agg(sum)\
.pct_change(12)\
.apply(lambda v: v * 100)\
.reset_index()
"""
c = alt.Chart(df_elec_yoy.melt(id_vars='dt').dropna()).mark_bar(width=1.5).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Year-over-year Growth [%]')),
alt.Row('variable_0:N', title='Sector'),
alt.Color('variable_0:N', title='Sector'),
tooltip=[alt.Tooltip('dt:T', format='%b %Y'), alt.Tooltip('value:Q', format=",.02")]
).properties(
title='US Growth of Electricity Consumption by Sector',
width=700,
background='white'
)
""";
c = doRidgeLineFor(df_elec_yoy.melt(id_vars='dt')[['dt', 'variable_0', 'value']].dropna(), 'dt', 'value', 'variable_0')
c.save('electricity.png')
c.display()
cmp = df_megawatts.resample("1Y").last().join(df_us_gdp)
cmp['year'] = cmp.index.map(lambda d: d.year)
cmp['decade'] = cmp.index.map(lambda v: "19{}0's".format(str(v)[2]) if v.year < 2000 else "20{}0's".format(str(v)[2]))
tegdp = alt.Chart(cmp.reset_index()).mark_point().encode(
alt.Y('Megawatt-hours', axis=alt.Axis(grid=False, title='Consumption [Megawatt-hours]'), scale=alt.Scale(zero=False, padding=35)),
alt.X('GDP', axis=alt.Axis(grid=False, title="GDP [USD]"), scale=alt.Scale(zero=False, padding=35)),
alt.Color('decade:N', title='Decade'),
).properties(
title="Total Electricity Consumption vs U.S. Real Gross Domestic Product",
width=1000,
height=500
)
text = tegdp.mark_text(
align='left',
baseline='middle',
dx=7,
size=10
).encode(
text='year:N'
)
tegdp + text