US Seaport Volumes¶

For the top 10 ports (80% of all TEUs)¶

Note: TEU means "Twenty-foot [container] Equivalent Unit"

A look at shipping port traffic, according to TEU reports from each of the top 10 port authorities.

import pandas as pd
import altair as alt
import numpy as np
import re
from json import loads, JSONDecodeError
from io import BytesIO
from os import environ
from datetime import date
from IPython.core.display import HTML

from time import sleep
from joblib import Memory

from urllib.request import urlopen, Request
from urllib.error import HTTPError, URLError

from tabula import read_pdf

from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options
import selenium.webdriver.support.expected_conditions as EC
from selenium.webdriver.common.by import By

def ajaxComplete(driver):
    try:
        return 0 == driver.execute_script("return jQuery.active")
    except WebDriverException:
        pass
    
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--headless")
#chrome_options.add_argument("download.default_directory=/tmp")
chrome_options.add_experimental_option("prefs", { "download.default_directory" : "/tmp" })

if environ.get("GITHUB_WORKFLOW"):
    raise RuntimeError("Requires manual updates")

def doChart(df, location='Oakland', metric='Full-Total'):
    #metric = 'Full-Total'

    return alt.Chart(df.reset_index()[['dt', metric]].dropna()).mark_line(color='purple').encode(
        alt.X('dt:T', axis=alt.Axis(title='')),
        alt.Y(f'{metric}:Q', axis=alt.Axis(title='Volume [TEUs]')),
        tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip(f"{metric}:Q", format=",.02f")]
    ).properties(
        title=f'Port of {location} {metric} Container Volume',
        width=750,
        height=400
    )

def doYoyChart(df, location='Oakland', metric='Full-Total', width=2, format=None):

    brush = alt.selection(type='interval', encodings=['x'])
    
    kwargs = {} if format is None else { "format": format }
    base = alt.Chart(df.reset_index()[['dt', metric]].dropna()).mark_line(color='purple').encode(
        alt.X('dt:T', axis=alt.Axis(title='', **kwargs), impute=alt.ImputeParams(value=None)),
        alt.Y(f'{metric}:Q', axis=alt.Axis(title='Volume [TEUs]')),
        tooltip=[alt.Tooltip("dt:T", format="%b %Y"), alt.Tooltip(f"nolarge:Q", format=",.02f")]
    ).properties(
        width=750,
        height=400
    )

    upper = base.mark_bar(width=width, color='blue').transform_window(
        window=[alt.WindowFieldDef("lag", metric, param=12, **{ 'as': 'previous' })],
        sort=[{"field": "dt"}],
    ).transform_calculate(
        yoy=f"((datum['{metric}'] - datum.previous) / datum.previous) * 100"
    ).encode(
        alt.X('dt:T', axis=alt.Axis(title='', **kwargs), scale=alt.Scale(domain=brush), impute=alt.ImputeParams(value=None)),
        alt.Y(f'yoy:Q', axis=alt.Axis(title='Year-over-Year Volume Growth [%]'), impute=alt.ImputeParams(method='value', keyvals=[400, 1000])),
        color=alt.condition(f"datum['yoy'] < 0",
            alt.value('lightsalmon'),
            alt.value('royalblue')
        )
    ).properties(
        title=f'Port of {location} {metric} Container Volume',
    )
    
    lower = base.properties(
        height=100
    ).add_selection(brush)

    return (upper & lower).properties(background='white')
    

uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'

memory = Memory('data/', verbose=0)

HTML("<h4><i>(data current as of {})</i></h4>".format(date.today().strftime("%B %d, %Y")))

Port of Los Angeles¶

# Port of Los Angeles
# https://www.portoflosangeles.org/business/statistics/container-statistics

# 1995
# https://www.portoflosangeles.org/business/statistics/container-statistics/historical-teu-statistics-2019

@memory.cache
def fetchLaData(u, year, cols, args = { 'skiprows': 1 }):
 
    req = Request(u, data=None, headers={ 'User-Agent': uaString })

    data = urlopen(req)

    tmp = pd.read_html(BytesIO(data.read()), **args)[0]
    try:
        tmp.columns = ['Month'] + cols
    except ValueError as e:
        #print(tmp.head())
        print(e)
        
    tmp['dt'] = tmp['Month'].apply(lambda v: pd.to_datetime("{}-{}".format(v, year), format="%B-%Y", errors='coerce'))
    
    return tmp

def fetchLaDataNoCache(u, year, cols, args = { 'skiprows': 1 }):
 
    req = Request(u, data=None, headers={ 'User-Agent': uaString })

    data = urlopen(req)

    tmp = pd.read_html(BytesIO(data.read()), **args)[0]
    try:
        tmp.columns = ['Month'] + cols
    except ValueError as e:
        #print(tmp.head())
        print(e)
        
    tmp['dt'] = tmp['Month'].apply(lambda v: pd.to_datetime("{}-{}".format(v, year), format="%B-%Y", errors='coerce'))
    
    return tmp

colsPre96 = ['Loaded Imports', 'Empty Imports', 'Total Imports', 
        'Loaded Export', 'Empty Exports', 'Total TEUs', 'Prior Year Change']

cols = ['Loaded Imports', 'Empty Imports', 'Total Imports', 
        'Loaded Export', 'Empty Exports', 'Total Exports', 'Total TEUs', 
        'Prior Year Change']

args = {
    'skiprows': 1
}

urlFor = lambda y: "https://www.portoflosangeles.org/business/statistics/container-statistics/historical-teu-statistics-{}".format(y)

currentYear = date.today().year

dfs_la = []
for y in range(1995, currentYear + 1):
    url = urlFor(y)
    #print(url)
    if y < 1996:
        dfs_la.append(fetchLaData(url, y, colsPre96, args))
    elif y == currentYear:
        dfs_la.append(fetchLaDataNoCache(url, y, cols, {}))
    else:
        dfs_la.append(fetchLaData(url, y, cols, {}))

df_la = pd.concat(dfs_la).dropna() #.tail()

df_la[cols[:1]] = df_la[cols[:1]].apply(pd.to_numeric)

extractValue = lambda v: float(re.sub("\.", "", v, count=1).replace(",", "")) if v.count(".") > 1 else float(v)

for c in df_la.columns:
    if c in ['Month', 'dt', 'Prior Year Change']:
        continue
    try:
        df_la[c] = df_la[c].apply(float)
    except ValueError:
        df_la[c] = df_la[c].apply(extractValue)

#df_la.head()

#df_la_smoothed = df_la.set_index('dt').resample('2M').mean().reset_index()

c = doYoyChart(df_la, location='Los Angeles', metric='Loaded Imports')
c.save('transportation-ocean.png')
c.display()

doYoyChart(df_la, location='Los Angeles', metric='Loaded Export')

doYoyChart(df_la, location='Los Angeles', metric='Total Imports')

doYoyChart(df_la, location='Los Angeles', metric='Total TEUs')

Port of Long Beach¶

# http://www.polb.com/economics/stats/teus_archive.asp

uaString = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'

req = Request("https://thehelm.polb.com/stellar_custom_table/table100/", data=None, headers={ 'User-Agent': uaString })

data = urlopen(req)

df_lb_raw = pd.read_html(BytesIO(data.read()), displayed_only=False)[0].iloc[:-1]

df_lb_raw = df_lb_raw.iloc[:, 1:]

df_lb_raw[df_lb_raw.columns[1:]] = df_lb_raw[df_lb_raw.columns[1:]].apply(pd.to_numeric)

df_lb_raw['dt'] = df_lb_raw['Date'].map(lambda v: pd.to_datetime(v, format='%b %Y'))

#df_lb_raw.head(16)

doYoyChart(df_lb_raw, location='Long Beach', metric='Total Loaded')

doYoyChart(df_lb_raw, location='Long Beach', metric='Total')

doYoyChart(df_lb_raw, location='Long Beach', metric='Total Empties')

doYoyChart(df_lb_raw, location='Long Beach', metric='Loaded Inbound')

Port of New York / New Jersey¶

dataUrl = "https://www.panynj.gov/content/port/en.model.json"

#jqPath = '\'.":children"."/port/en/our-port/facts-and-figures".":items"."root".":items"."responsivegrid".":items"."accordionlist_435572874".":items"\''
    
def doNynjFetch(retries = 5):
    if retries < 0:
        return loads("{}")
    try:
        req = Request(dataUrl, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req)

        dict_data = loads(data.read())

        r = dict_data[":children"]\
                     ["/port/en/our-port/facts-and-figures"]\
                     [":items"]\
                     ["root"]\
                     [":items"]\
                     ["responsivegrid"]\
                     [":items"]\
                     ["accordionlist_435572874"]\
                     [":items"]
        
        return r
    except JSONDecodeError:
        sleep(1)
        return doNynjFetch(retries-1)

nynj_dict_content = doNynjFetch()
    
post2011Cols = ['Import Loads', 'Import Empties', 'Export Loads', 'Export Empties', 'Total Loads', 'Total Empties', 'Total TEUs', 'Total Rail Lifts']
cols = ["Import TEUs", "Export TEUs", "Total TEUs", "Total Rail Lifts"]

currentYear = date.today().year

dfs_nynj = []
for v in nynj_dict_content.values():
    year = v["linkAriaLabel"]
    #print(year)
    try:
        tmp = pd.read_html(v[":items"]["text"]["text"])
    except KeyError:
        # text_copy
        tmp = pd.read_html(v[":items"]["text_copy"]["text"])
    for n, monthData in enumerate(tmp):
        # skip the YTD section
        #if n < 2:
        #    continue
        
        mtmp = monthData.copy()
        try:
            mtmp.columns = ['Period'] + post2011Cols
            
            # cleanup types before doing math
            mtmp[post2011Cols] = mtmp[post2011Cols].applymap(lambda v: re.sub("[^0-9.]", "", str(v)))
            mtmp[post2011Cols] = mtmp[post2011Cols].apply(lambda v: pd.to_numeric(v, errors='coerce'))
            
            mtmp['Import TEUs'] = mtmp['Import Loads'] + mtmp['Import Empties']
            mtmp['Export TEUs'] = mtmp['Export Loads'] + mtmp['Export Empties']
            
            # Monthy is shown Dec->Jan after 2014, Jan->Dec before 2014 (and for 2020)
            index = 13-n if int(year) > 2014 and int(year) != currentYear else n
            mtmp['dt'] = mtmp.iloc[:, 0].map(lambda v: pd.to_datetime("{}-{}".format(index, v), format="%m-%Y", errors='coerce'))
        except ValueError:
            mtmp.columns = ['Period'] + cols
            mtmp['dt'] = mtmp.iloc[:, 0].map(lambda v: pd.to_datetime(v, format="%B %y", errors='coerce'))
        
            mtmp[cols] = mtmp[cols].applymap(lambda v: re.sub("[^0-9.]", "", str(v)))
            mtmp[cols] = mtmp[cols].apply(lambda v: pd.to_numeric(v, errors='coerce'))
        
        dfs_nynj.append(mtmp)

commonFields = ['Period', 'dt', 'Import TEUs', 'Export TEUs', 'Total TEUs']

df_nynj = pd.concat(dfs_nynj)[commonFields].dropna() #.tail(30)

df_nynj = df_nynj.groupby('dt').agg('mean') #.head()

doYoyChart(df_nynj.reset_index(), location="New York/New Jersey", metric="Import TEUs")

doYoyChart(df_nynj.reset_index(), location="New York/New Jersey", metric="Export TEUs")

doYoyChart(df_nynj[12:].reset_index(), location="New York/New Jersey", metric="Total TEUs")

Georgia Ports¶

@memory.cache
def doGaFetch(url, retries=5):
    if retries < 0:
        return None
    try:
        req = Request(url, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req)

        tabs = read_pdf(BytesIO(data.read()), pages='all')

        # noop to test for the table
        tabs[0]
        
        return tabs
    except KeyError:
        sleep(1)
        return doGaFetch(url, retries-1)
    
reportPage = 'https://gaports.com/sales/by-the-numbers/'
currentReport = !curl -s -L $reportPage | egrep -i 'Monthly.*TEU.*pdf' | perl -pe 's/.*href=\"([^"]+)\".*/\1/'

print(currentReport[0])

#current = "https://gaports.com/wp-content/uploads/2021/04/Monthly-TEU-Throughput-March-2021.pdf"

tabs = doGaFetch(currentReport[0])

cols = ['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Total']

flatten = lambda l: [item for sublist in l for item in sublist]

def parseGpTable(df, columns, start=0, end=17):
    gp_raw = df.iloc[start:end, 1:].copy() if end is not None else df.iloc[start:17, :].copy()

    #print(gp_raw.iloc[0, :].values)
    
    firstRowVals = flatten(map(lambda v: str(v).split(), gp_raw.iloc[0, :-1].dropna().values))
    #print(firstRowVals)
    
    # Use all the month values in order then append the total value as the last element
    firstRow = [firstRowVals[n] if n < len(firstRowVals) else '' for n in range(len(cols[:-1]))] + [gp_raw.iloc[0, -1]]
    #print(firstRow)

    records = gp_raw.iloc[:, 1:].apply(lambda s: flatten(map(lambda v: str(v).split(), s.values)), axis=1)

    #print(records.head(1))
    
    df_ga_melted = pd.DataFrame.from_records([firstRow] + [r for r in records.values], columns=columns).melt(id_vars='Year')
    
    #print(df_ga_melted[df_ga_melted['Year'] == '2020'].head(15))
    
    df_ga_melted = df_ga_melted[df_ga_melted['variable'] != 'Total']
    df_ga_melted['dt'] = pd.to_datetime(df_ga_melted['Year'] + '-' + df_ga_melted['variable'], format='%Y-%b')
    df_ga_melted['value'] = df_ga_melted['value'].apply(lambda v: pd.to_numeric(str(v).replace(",", ""), errors='coerce'))

    df_ga_tst = df_ga_melted[['value', 'dt']].set_index('dt').resample('1M').mean().rename({'value': 'TEU Total'}, axis=1)

    return df_ga_tst.reset_index()

df_ga_tst = parseGpTable(tabs[0], columns=cols, start=3, end=None)

cols2 = ['Year', 'Key', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Total']

#gp_raw = tabs[0].iloc[3:21].copy()

gp_detail_raw = pd.concat([tabs[0].iloc[18:].copy()] + tabs[1:]).copy()

gp_detail_raw = gp_detail_raw[~gp_detail_raw.iloc[:, 1].isna()]

#print(gp_detail_raw)

# Exclude rows with erroneous data
gp_detail_raw = gp_detail_raw[~gp_detail_raw.iloc[:, 2].str.contains("%")]
gp_detail_raw = gp_detail_raw[gp_detail_raw.iloc[:, 1].str.match("^(I|E|T|\d)")]

#print(gp_detail_raw.head(25))

hypenate = lambda v: str(v).replace("rt ", "rt-")\
                           .replace("Total Full", "Total-Full")\
                           .replace("Total Empty", "Total-Empty")\
                           .replace("Total All", "Total-All")

gp_detail_raw.iloc[:, 1] = gp_detail_raw.iloc[:, 1].apply(hypenate)

"""
# Special case for the current year
firstRowVals2 = flatten(map(lambda v: hypenate(v).split(), gp_detail_raw.iloc[0, :-1].values))
firstRow2 = [firstRowVals2[n] if n < len(firstRowVals2) else '' for n in range(len(cols2[:-1]))] + [gp_detail_raw.iloc[0, -1]]

# General case for the historical years
remainingRows = gp_detail_raw.iloc[1:].apply(lambda s: flatten(map(lambda v: hypenate(v).split(), s.to_list())), axis=1)

gp_detail = pd.DataFrame.from_records([firstRow2] + [r for r in remainingRows.values], columns=cols2)

#print(gp_detail.head())

# Find all the records that don't have the year in them
rowsToShift = gp_detail[~gp_detail['Year'].str.match("^\d")].index.values

#####
# Note, this is the likely line to toggle between commented<->uncommented if seeing issues
#####
# Shift records not containing the year to the right by one to line things up
gp_detail.iloc[rowsToShift] = gp_detail.iloc[rowsToShift].apply(lambda v: pd.Series([None] + v.to_list()[:-1]), axis=1)

gp_detail.iloc[rowsToShift, 0] = pd.to_numeric('', errors='coerce')
""";

gp_detail = gp_detail_raw.copy()
gp_detail.columns = cols2

# Fill the years for the rest of the rows
gp_detail['Year'] = gp_detail['Year'].ffill(limit=8)

#print(gp_detail.head(20))

df_ga_detail_melted = gp_detail.melt(id_vars=["Key", "Year"])

df_ga_detail_melted = df_ga_detail_melted[df_ga_detail_melted["variable"] != "Total"]

df_ga_detail_melted['dt'] = pd.to_datetime(df_ga_detail_melted['Year'].apply(str).apply(lambda v: v.split(".")[0]) +\
                                           '-' + df_ga_detail_melted['variable'], format="%Y-%b", errors='coerce')

df_ga_detail_melted['value'] = df_ga_detail_melted['value']\
                                    .apply(lambda v: str(v).replace(",", ""))\
                                    .apply(lambda v: pd.to_numeric(v, errors='coerce'))

#print(df_ga_detail_melted.fillna('nan').dropna().tail())

df_ga_ts = df_ga_detail_melted[['dt', 'Key', 'value']].dropna().pivot(index='dt', columns='Key').reset_index()
df_ga_ts.columns = ['dt', 'Export Empty', 'Export Full', 'Export Total',\
                    'Import Empty', 'Import Full', 'Import Total', 'Total All', 'Total Empty', 'Total Full']

df_ga_ts = df_ga_ts.dropna()

doYoyChart(df_ga_ts, location='Georgia', metric='Total All')

doYoyChart(df_ga_ts, location='Georgia', metric='Import Empty')

doYoyChart(df_ga_ts, location='Georgia', metric='Import Full')

doYoyChart(df_ga_ts, location='Georgia', metric='Export Total')

doYoyChart(df_ga_ts, location='Georgia', metric='Export Empty')

doYoyChart(df_ga_ts, location='Georgia', metric='Export Full')

Northwest Seaport Alliance (Ports of Seattle and Tacoma)¶

result = !curl -s 'https://www.nwseaportalliance.com/about-us/cargo-statistics' | grep 'xlsx'

#pageCount = re.findall("of ([0-9]+)", "".join(rawPg))[0]

currentReport = [u for u in re.findall('href="([^"]+)"', "".join(result)) if "xls" in u][0]

#print(currentReport)

df_nwsea = pd.read_excel(currentReport, skiprows=2)

df_nwsea.columns = ["Month", "Measure"] + df_nwsea.columns.tolist()[2:]

df_nwsea['Month'] = df_nwsea['Month'].ffill()

df_nwsea = df_nwsea[~pd.isna(df_nwsea['Measure'])]

df_nwsea = df_nwsea[~df_nwsea['Month'].str.match("%|Total")]

df_nwsea_melt = df_nwsea.melt(id_vars=['Month', 'Measure'])

df_nwsea_melt['dt'] = pd.to_datetime(df_nwsea_melt['Month'] + "-" + df_nwsea_melt['variable'].apply(str), format='%b-%Y')\
                        + pd.offsets.MonthEnd(0)

#df_nwsea_melt.tail(25)

df_nwsea_tab = df_nwsea_melt[['dt', 'Measure', 'value']].pivot_table(index='dt', columns='Measure', values='value')

df_nwsea_tab = df_nwsea_tab[df_nwsea_tab['Total TEUs'] != 0]
#df_nwsea_tab.head()

df_nwsea_tab

doYoyChart(df_nwsea_tab, metric="International Exports Full", location="Seattle/Tacoma", width=7, format='%b-%y')

doYoyChart(df_nwsea_tab, metric="International Exports Empty", location="Seattle/Tacoma", width=7, format='%b-%y')

Port of Houston¶

@memory.cache
def fetchHouston(url, retries=5):
    if retries < 0:
        return pd.DataFrame()
    try:
        req = Request(url, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req)

        cols = ["Date", "Loaded Imports", "Loaded Exports", "Loaded Total",
                "Empty Imports", "Empty Exports", "Empty Total", "Loaded and Empty Total"]

        dfs_hs = read_pdf(BytesIO(data.read()), pages='all', pandas_options={ 'names': cols }) #.head(20)

        df_hs = pd.concat([dfs_hs[0].iloc[5:]] + dfs_hs[1:])

        df_hs[df_hs.columns.to_list()[1:]] = df_hs[df_hs.columns.to_list()[1:]]\
                .applymap(lambda v: pd.to_numeric(v.replace(",", "") if isinstance(v, str) else v, errors='coerce'))

        df_hs['dt'] = df_hs['Date'].map(lambda v: pd.to_datetime(str(v), format='%b-%y', errors='coerce'))

        return df_hs
    except KeyError:
        sleep(1)
        return fetchHouston(url, retries-1)

res = !curl -s 'https://porthouston.com/about-us/statistics/' | grep 'TEU-stats'
    
currentUrl = re.findall('href="([^"]+)"', "".join(res))[0]

#print(currentUrl)
    
# TODO - automate detection of this url from here: https://porthouston.com/about-us/statistics/
df_hs = fetchHouston(currentUrl)
#df_hs.tail()

doYoyChart(df_hs, metric="Empty Imports", location="Houston")

doYoyChart(df_hs, metric="Empty Exports", location="Houston")

doYoyChart(df_hs, metric="Loaded Total", location="Houston")

Port of Charleston¶

def fetchCharleston(url, retries=5):
    if retries < 0:
        return None
    try:
        req = Request(url, data=None, headers={ 'User-Agent': uaString })

        data = urlopen(req, timeout=15)

        dfs_ch = read_pdf(BytesIO(data.read()), pages='all')

        # noop
        dfs_ch[0]
        
        return dfs_ch
    except KeyError:
        print("Response code: {}".format(data.getcode()))
        sleep(1)
        return fetchCharleston(url, retries-1)
    except URLError:
        sleep(1)
        return fetchCharleston(url, retries-1)

dfs_ch = fetchCharleston("http://scspa.com/wp-content/uploads/gl078-pc-teu-history.pdf")

def fixColumns(df_):
    firstDataRow = 2
    
    df = df_.copy()
     
    # Standardize column count
    if df.shape[1] == 10:
        # Standardize rail moves column location
        if df.iloc[:, 1].isna().all():
            df.iloc[:, 1] = df.iloc[:, 2]
        
        df = df.drop(df.columns[2], axis=1)
    
    res = df.iloc[firstDataRow:].copy()
    
    #print(df.head())
    
    #print(df.iloc[firstDataRow:, 2].values)
    
    # split rail column 
    _ = df.iloc[firstDataRow:, 2].astype(str).str.split(" ", n = 1, expand = True)
    
    #print(_)

    res["RAIL-MOVES"]= _[0]

    try:
        res["RAIL-RATIO"]= _[1]
    except KeyError:
        return None
        
    #res = pd.concat([df.iloc[firstDataRow:, 0:4], res.iloc[:, 2:]], axis=1)

    # split pier loaded column 
    _ = df.iloc[firstDataRow:, 3].str.split(" ", n = 2, expand = True) 

    res["PIER-LOADED-EXPORT"]= _[0] + _[1] 

    try:
        res["PIER-LOADED-IMPORT"]= _[2] 
    except KeyError:
        return None
        
    #res = pd.concat([res, res.iloc[:, -2:]], axis=1)

    try:
        # split pier empty column 
        _ = df.iloc[firstDataRow:, 4].str.split(" ", n = 2, expand = True) 

        res["PIER-EMPTY-EXPORT"]= _[0] + _[1]

        res["PIER-EMPTY-IMPORT"]= _[2]

    except KeyError:
        res["PIER-EMPTY-EXPORT"]= _[0]

        res["PIER-EMPTY-IMPORT"]= _[1]
        
    #res["PIER-TOTAL"]= _[2] 

    #res = pd.concat([df.iloc[firstDataRow:, 0], res.iloc[:, 2:]], axis=1)

    # split loaded teu column 
    _ = df.iloc[firstDataRow:, 6].str.split(" ", n = 2, expand = True) 

    res["TEU-LOADED-EXPORT"]= _[0] + _[1]

    res["TEU-LOADED-IMPORT"]= _[2]

    #res = pd.concat([df.iloc[firstDataRow:, 0], df.iloc[firstDataRow:, 4:6], res.iloc[:, 8:10], df.iloc[firstDataRow:, -1]], axis=1)

    #print(res)

    try:
        # split empty teu column 
        _ = df.iloc[firstDataRow:, 7].str.split(" ", n = 2, expand = True) 

        res["TEU-EMPTY-EXPORT"]= _[0] + _[1]

        res["TEU-EMPTY-IMPORT"]= _[2]
    except KeyError:
        res["TEU-EMPTY-EXPORT"]= _[0]

        res["TEU-EMPTY-IMPORT"]= _[1]

    #res = pd.concat([df.iloc[firstDataRow:, 0], df.iloc[firstDataRow:, 4:6], res.iloc[:, 8:], df.iloc[firstDataRow:, -1]], axis=1)

    res = pd.concat([res.iloc[:, 0],
                     res.iloc[:, 1],
                     res["RAIL-MOVES"],
                     res["RAIL-RATIO"],
                     res["PIER-LOADED-EXPORT"],
                     res["PIER-LOADED-IMPORT"],
                     res["PIER-EMPTY-EXPORT"],
                     res["PIER-EMPTY-IMPORT"],
                     res.iloc[:, 5],
                     res["TEU-LOADED-EXPORT"],
                     res["TEU-LOADED-IMPORT"],
                     res["TEU-EMPTY-EXPORT"],
                     res["TEU-EMPTY-IMPORT"],
                     res.iloc[:, 8],
                    ], axis=1)
    
    res.columns = ['DATE',
                   'RAIL-DRAYS', 'RAIL-MOVES', 'RAIL-RATIO',
                   'PIER-LOADED-EXPORT', 'PIER-LOADED-IMPORT',
                   'PIER-EMPTY-EXPORT', 'PIER-EMPTY-IMPORT',
                   'PIER-TOTAL',
                   'TEU-LOADED-EXPORT', 'TEU-LOADED-IMPORT',
                   'TEU-EMPTY-EXPORT', 'TEU-EMPTY-IMPORT',
                   'TEU-TOTAL']
    #print(res)
    
    # fix remaining column labels
    #res.columns = ['DATE', 'RAIL-DRAYS', 'RAIL-MOVES'] + res.columns.to_list()[3:-1] + ["TEU-TOTAL"]

    # convert all columns to numeric
    res.iloc[:, 1:3] = res.iloc[:, 1:3]\
            .applymap(lambda v: pd.to_numeric(v.replace(",", "").replace(" ", "") if isinstance(v, str) else v, errors='coerce'))

    
    res.iloc[:, 4:] = res.iloc[:, 4:]\
            .applymap(lambda v: pd.to_numeric(v.replace(",", "").replace(" ", "") if isinstance(v, str) else v, errors='coerce'))

    res['dt'] = res['DATE'].apply(lambda v: pd.to_datetime(v, format="%B, %Y", errors='coerce'))

    return res

#df_ch = pd.concat([fixColumns(d) for d in dfs_ch[:2]])

#df_ch.head()

#doYoyChart(df_ch, metric='TEU-LOADED-EXPORT', location='Charleston')

#doYoyChart(df_ch, metric='TEU-LOADED-IMPORT', location='Charleston')

#doYoyChart(df_ch, metric='TEU-TOTAL', location='Charleston')

Port of Virginia¶

# http://www.portofvirginia.com/who-we-are/port-stats/

#raise RuntimeError("Broken")

#result = !curl -sL 'http://www.portofvirginia.com/who-we-are/port-stats/' | grep 'xlsx'

#currentReport = [u for u in re.findall('href="([^"]+)"', "".join(result)) if "xls" in u][0]

baseUrl = "http://www.portofvirginia.com/who-we-are/port-stats/"

def fetchCurrentReport(): 
    driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver',
                              options=chrome_options)

    driver.get(baseUrl)
    
    wait = WebDriverWait(driver, 15)
    
    #wait.until(ajaxComplete)

    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#container")))
    
    reportLink = "#container > div > section > article > div > ul > li:nth-child(2) > a"

    return driver.find_element_by_css_selector(reportLink).get_attribute('href')

currentReport = fetchCurrentReport()

#print(currentReport)

oldUrl = "http://www.portofvirginia.com/excel/Port_of_Virginia_Statistics.xlsx"

url = currentReport #"https://web.archive.org/web/20191218025738if_/http://www.portofvirginia.com/excel/Port_of_Virginia_Statistics.xlsx"
df_va = pd.read_excel(url, skiprows=1)
df_va.columns = ['Month', 'Key'] + df_va.columns.to_list()[2:]

df_va = df_va.iloc[:71]
df_va = df_va.dropna(how='all')
df_va['Month'] = df_va['Month'].ffill(limit=4)

#df_va.head(20)

df_va_melted = df_va.melt(id_vars=['Month', 'Key'])
df_va_melted = df_va_melted[~df_va_melted['value'].isna()]
df_va_melted['dt'] = pd.to_datetime(df_va_melted['Month'] + ' ' + df_va_melted['variable'].apply(str), format="%b %Y")

df_va_clean = df_va_melted.pivot(index='dt', columns='Key', values='value')

doYoyChart(df_va_clean, location='Virgina', metric='Export Empties')

doYoyChart(df_va_clean, location='Virgina', metric='Import Loads')

doYoyChart(df_va_clean, location='Virgina', metric='Export Loads')

doYoyChart(df_va_clean, location='Virgina', metric='Total TEUs')

# https://www.oaklandseaport.com/performance/facts-figures/

df_oakland = pd.read_excel("https://www.oaklandseaport.com/files/PDF/Oakland%20Monthly%20TEUs%201998-2021.xlsx",
                           skiprows=2)

df_oakland.columns = [re.sub("[^A-z]+", "-", c) for c in df_oakland]

df_oakland['dt'] = pd.to_datetime(df_oakland['Year'].apply(str) + df_oakland['Month'], format="%Y%B", errors='coerce')

df_oakland = df_oakland.dropna()

df_oakland = df_oakland.drop(['Year', 'Month'], axis=1)

df_oakland = df_oakland.set_index('dt')

#df_oakland.tail()

command = "curl -s 'https://www.oaklandseaport.com/performance/facts-figures/' | egrep 'class=\"(number|chart-subtitle)\"'"

df_oak_latest_res = !$command

df_oak_latest_raw = "".join(df_oak_latest_res)

extractMonthAndValue = lambda s: re.findall('title="([A-z]+)"><span class="number">([0-9,.]+)', s)

def getLabel(n):
    if n == 0:
        return 'Import-Full'
    elif n == 1:
        return 'Export-Full'
    elif n == 2:
        return 'Import-Empty'
    else:
        return 'Export-Empty' 

df_oak_latest_stg = [(getLabel(n-1), extractMonthAndValue(v)) for n, v in enumerate(re.split('class="chart-subtitle"', df_oak_latest_raw)) if "number" in v]

currentYear = date.today().year

df_oak_latest = pd.DataFrame()
for kind, vals in df_oak_latest_stg:
    if len(vals) == 0:
        continue
        
    #print(kind, vals)
    tmp = pd.DataFrame.from_records(vals)
    tmp['dt'] = pd.to_datetime(f"{currentYear}-" + tmp.iloc[:, 0], format="%Y-%b")
    tmp.iloc[:, 1] = pd.to_numeric(tmp.iloc[:, 1].map(lambda v: v.replace(",", "")))
    tmp['type'] = tmp.index.map(lambda v: kind)
    tmp.columns = ['month', 'value', 'dt', 'type']
    df_oak_latest = pd.concat([df_oak_latest, tmp])

df_oak_latest_tab = df_oak_latest.pivot_table(index='dt', columns='type', values='value')

df_oak_latest_tab['Total-Full'] = df_oak_latest_tab['Import-Full'] + df_oak_latest_tab['Export-Full']
df_oak_latest_tab['Total-Empty'] = df_oak_latest_tab['Import-Empty'] + df_oak_latest_tab['Export-Empty']
df_oak_latest_tab['Grand-Total'] = df_oak_latest_tab['Total-Full'] + df_oak_latest_tab['Total-Empty']

#df_oak_latest_tab.head()

df_oak_combined = pd.concat([df_oakland, df_oak_latest_tab]).reset_index()

Port of Oakland¶

doYoyChart(df_oak_combined, metric='Total-Full', location='Oakland')

doYoyChart(df_oak_combined, metric='Total-Empty', location='Oakland')

doYoyChart(df_oak_combined, metric='Grand-Total', location='Oakland')

US Seaport Volumes¶

For the top 10 ports (80% of all TEUs)¶

(data current as of April 27, 2022)

Port of Los Angeles¶

Port of Long Beach¶

Port of New York / New Jersey¶

Georgia Ports¶

Northwest Seaport Alliance (Ports of Seattle and Tacoma)¶

Port of Houston¶

Port of Charleston¶

Port of Virginia¶

Port of Oakland¶