US Macroeconomic Data Explorer | Oil and Gas Sector

Oil and Gas Sector


A look at oil and natural gas, according to weekly supply reports from the Energy Information Agency.

In [1]:
import pandas as pd
import altair as alt
import numpy as np
import re

from time import sleep
from joblib import Memory

url = 'https://www.eia.gov/petroleum/supply/weekly/archive/'

res = !curl -L -s $url

html = "".join(res)

# https://www.eia.gov/petroleum/supply/weekly/archive/2020/2020_02_20/wpsr_2020_02_20.php

links = re.findall('href="([^"]+\d.php)"', html)

dates = [l.split("/")[-2].replace("_", "-") for l in links]
In [2]:
%%time

baseUrl = "https://www.eia.gov/petroleum/supply/weekly/archive/{y}/{y}_{m:02d}_{d:02d}/csv/table1.csv"

memory = Memory('data/', verbose=0)

@memory.cache
def fetchEiaReport(year="2019", month="7", day="24"):
    #print(baseUrl.format(y=year, m=int(month), d=int(day)))
    try:
        df = pd.read_csv(baseUrl.format(y=year, m=int(month), d=int(day)), usecols=[c for c in range(8)])
    except UnicodeDecodeError:
        df = pd.read_csv(baseUrl.format(y=year, m=int(month), d=int(day)), usecols=[c for c in range(8)], encoding="ISO-8859-1")
    except ValueError:
        print("Failed")
        return None
    
    df = df.iloc[:19, :2].set_index('STUB_1')
    
    return df.T
    
#fetchReport(year="2020", month="2", day="20").head()

dfs = []
for d in dates:
    dfs.append(fetchEiaReport(*(d.split("-"))))
    sleep(0.2)

df_oil = pd.concat(dfs)

df_oil['dt'] = df_oil.index.map(lambda v: pd.to_datetime(v, format='%m/%d/%y'))

df_oil_ts = df_oil.set_index('dt') #.agg('mean').resample('1W').nearest()

df_oil_ts = df_oil_ts.applymap(lambda v: str(v).replace(",", "")).applymap(pd.to_numeric)
df_oil_ts = df_oil_ts.groupby('dt').agg('mean').resample('1W').nearest()

df_oil_yoy = df_oil_ts.pct_change(52).apply(lambda v: v*100)
CPU times: user 36.3 s, sys: 1.43 s, total: 37.8 s
Wall time: 4min 25s
In [3]:
col = 'Crude Oil'

def chartFor(df, metric='Crude Oil', label='stock'):
    return alt.Chart(df.reset_index()[['dt', metric]]).mark_bar(width=1).encode(
        alt.X('dt:T', axis=alt.Axis(title='')),
        alt.Y(f'{metric}:Q', axis=alt.Axis(title='Year-over-year Change [%]')),
        color=alt.condition(f"datum['{metric}'] < 0",
            alt.value('darkred'),
            alt.value('royalblue')
        ),
        tooltip=[alt.Tooltip('dt:T', title=''), alt.Tooltip(f'{metric}:Q', title='YoY % Change', format=',.02f')]
    ).properties(
        title=f'Year over Year change in {metric} {label}',
        width=750,
        height=400
    )

chartFor(df_oil_yoy, col)
Out[3]:
In [4]:
col = 'Total Motor Gasoline'

chartFor(df_oil_yoy, col)
Out[4]:
In [5]:
col = 'Fuel Ethanol'

chartFor(df_oil_yoy, col)
Out[5]:
In [6]:
def fetchPriceData(url):
    commandString = "curl -s '{0}' | grep 'originalData ='".format(url)
    res = !$commandString
    
    dataString = res[0].split('= ')[-1]
    
    dates = [pd.to_datetime(d) for d in re.findall('"date":"(\d+-\d+-\d+)"', dataString)]
    closes = [float(v) for v in re.findall('"close":"(\d+.\d+)"', dataString)]
    closes1 = [float(v) for v in re.findall('"close1":"(\d+.\d+)"', dataString)]
    
    recs = zip(dates, closes, closes1) if closes1 else zip(dates, closes)
    cols = ['Date', 'Close-Adj', 'Close-Nominal'] if closes1 else ['Date', 'Close-Nominal']
    
    return pd.DataFrame.from_records(recs, columns=cols)

df_oil = fetchPriceData('https://www.macrotrends.net/assets/php/chart_iframe_comp.php?id=1369&url=crude-oil-price-history-chart:1656')

How does the crude oil price vary with inventories?

In [7]:
oil_price = alt.Chart(df_oil[-86:]).mark_line(color='black', strokeDash=[4, 2]).encode(
    alt.X('Date:T'),
    alt.Y('Close-Nominal:Q', axis=alt.Axis(title='Oil Price (black) [USD/barrel]'))
).properties(
    width=750,
    height=400
)

c = (chartFor(df_oil_yoy, 'Crude Oil') + oil_price).resolve_scale(y='independent').properties(
    background='white'
)

c.save('oil-gas-inventories.png')
c.display()

Natural Gas

In [8]:
url = 'https://www.eia.gov/dnav/ng/xls/NG_STOR_WKLY_S1_W.xls'

df_ng = pd.read_excel(url, sheet_name='Data 1', skiprows=2)

df_ng.columns = ['Date', 'Lower48', 'East', 'Midwest', 'Mountain', 'Pacific', 'Central', 'SaltSouthCental', 'NonsaltSouthCental']

#df_ng.head()

df_ng['dt'] = df_ng.Date.map(pd.to_datetime)

#df_ng.head()

df_ng_yoy = df_ng.set_index('dt').iloc[:, 1:].pct_change(52).apply(lambda v: v * 100)

#df_ng_yoy.tail()
In [9]:
chartFor(df_ng_yoy, 'Lower48', label='Natural Gas storage')
Out[9]:
In [10]:
df_natgas = fetchPriceData('https://www.macrotrends.net/assets/php/chart_iframe_comp.php?id=2478&url=natural-gas-prices-historical-chart')

#df_natgas.head()

How does the natural gas price vary with inventories?

In [11]:
ng_price = alt.Chart(df_natgas[-110:]).mark_line(color='black', strokeDash=[4, 2]).encode(
    alt.X('Date:T'),
    alt.Y('Close-Nominal:Q', axis=alt.Axis(title='Natural Gas Price (black) [USD/MMBtu]'))
).properties(
    width=750,
    height=400
)

(chartFor(df_ng_yoy, 'Lower48', label='Natural Gas storage') + ng_price).resolve_scale(y='independent').properties(
    background='white'
)
Out[11]:

© kdunn926