import pandas as pd
import altair as alt
import numpy as np
import re
# https://www.census.gov/foreign-trade/statistics/historical/index.html
# -> U.S. International Trade In Goods
# Data not currently used
"""
df = pd.read_excel("http://www.census.gov/foreign-trade/statistics/historical/goods.xls", skiprows=3) #.iloc[1:]
subCategories = ["BOP Basis", "BOP Change", "Census Basis", "Census Change"]
categories = [list(zip([c] * 4, subCategories)) for c in filter(lambda v: v == v, df.iloc[0].to_numpy())]
cats = [v for nested in categories for v in nested]
columns = list(map(lambda t: t if isinstance(t, str) else ' '.join(t), ['Period'] + cats))
df.columns = columns
df = df.iloc[2:-3]
df['Period'] = df['Period'].apply(str)
#df.head(30)
#df.tail()
c = alt.Chart(df.iloc[-30:]).mark_bar().encode(
alt.X('Period:T'),
alt.Y('Balance BOP Change:Q')
)
cc = alt.Chart(df.iloc[-30:]).mark_bar().encode(
alt.X('Period:T'),
alt.Y('Exports BOP Change:Q')
)
ccc = alt.Chart(df.iloc[-30:]).mark_bar().encode(
alt.X('Period:T'),
alt.Y('Imports BOP Change:Q')
)
""";
# https://www.census.gov/foreign-trade/balance/c0015.html
dfb = pd.read_excel("https://www.census.gov/foreign-trade/balance/country.xlsx", engine='openpyxl')
# dfb.tail()
# dfb.CTYNAME.unique()
dfb_t = dfb[dfb.CTYNAME.isin(['World, Not Seasonally Adjusted'])].replace(0, np.nan)
# Drop
dfb_t = dfb_t.drop(['CTY_CODE', 'CTYNAME', 'IYR', 'EYR'], axis=1)
dfb_t = dfb_t[~dfb_t['year'].isin(['IYR', 'EYR'])]
dfb_melted = dfb_t.melt(id_vars='year')
dfb_melted['Flow'] = dfb_melted['variable'].map(lambda v: "Import" if v[0] == "I" else "Export")
dfb_melted['Month'] = dfb_melted['variable'].map(lambda v: v[1:])
dfb_melted['dt'] = pd.to_datetime(dfb_melted['year'].apply(int).apply(str) + ' ' + dfb_melted['Month'], format='%Y %b')
dfb = dfb_melted.pivot(index='dt', columns='Flow', values='value')
dfb['Balance'] = dfb['Export'] - dfb['Import']
dfb_ts = dfb.reset_index().melt(id_vars='dt')
alt.Chart(dfb_ts.dropna()).mark_line().encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Amount [Million USD]')),
alt.Color('Flow'),
tooltip=[alt.Tooltip('Flow'), alt.Tooltip('dt:T', format="%b %Y"), alt.Tooltip('value:Q')]
).properties(
title='Monthly US Trade Volumes',
height=450,
width=700
)
def doRidgeLineFor(df, x, y, row, title='Import Growth By Category', hover='% Change', overlap=2, step=100):
return alt.Chart(df, height=step).transform_joinaggregate(
mean_val=f'mean({x})', groupby=[row]
).mark_area(
interpolate='monotone',
fillOpacity=0.8,
stroke='lightgray',
strokeWidth=0.5
).encode(
alt.X(f'{x}:T', title=''),
alt.Y(
f'{y}:Q',
scale=alt.Scale(range=[step, -step * overlap]),
axis=None
),
alt.Fill(
f'{row}:N',
title='Category',
#legend=None,
scale=alt.Scale(scheme='dark2')
),
tooltip=[alt.Tooltip(f'{x}:T'), alt.Tooltip(f'{row}:N'), alt.Tooltip(f'{y}:Q', title=hover, format=',.02f')]
).facet(
row=alt.Row(
f'{row}:N',
title=None,
header=alt.Header(labelColor='white'), #, labelAnchor='end')
),
).properties(
title=title,
bounds='flush'
).configure_facet(
spacing=0,
).configure_view(
stroke=None,
continuousWidth=700,
).configure_title(
anchor='middle'
)
#categories_yoy = adj_categories_agg.copy().set_index('dt')[['variable', 'value_adj']].pivot(columns='variable')\
# .applymap(float).pct_change(4).applymap(lambda v: v * 100).reset_index().melt(id_vars='dt')
dfb_yoy = dfb_ts.set_index(['dt', 'Flow'])['value'].pct_change(12).apply(lambda v: v * 100)\
.reset_index().melt(id_vars=['dt', 'Flow'])
dfb_yoy = dfb_yoy[(dfb_yoy['dt'] > '1989-01-01')].dropna()
#dfb_yoy.dropna().tail()
"""alt.Chart(dfb_yoy[dfb_yoy['dt'] > '1994-01-01'].dropna()).mark_bar(width=1.5).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('yoy:Q', axis=alt.Axis(title='Year-over-Year Growth [% change]')),
alt.Color('Flow:N'),
alt.Row('Flow:N'),
tooltip=[alt.Tooltip('dt:T', format="%b %Y"), alt.Tooltip('Flow:N'), alt.Tooltip('yoy:Q')]
).transform_calculate(
yoy='datum.value * 100'
).properties(
title='US Trade Growth',
height=450,
width=700
)"""
doRidgeLineFor(dfb_yoy[['dt', 'Flow', 'value']].dropna(), 'dt', 'value', 'Flow', title='US Trade Growth')
# https://www.census.gov/foreign-trade/statistics/historical/index.html
def parseTradeTimeseries(url="https://www.census.gov/foreign-trade/statistics/historical/NSAIMP.xls"):
df_tmp = pd.read_excel(url, skiprows=4).iloc[:-4].dropna(how='all')
df_tmp.columns = ['Month', 'Total'] + [c.replace(".", "") for c in df_tmp.columns.to_list()[2:]]
# Find the latest year in the dataset
latestYear = df_tmp[df_tmp.Month.apply(lambda v: True if isinstance(v, int) else False)]['Month'].max()
# Convert the months for each record into valid timestamps, using the year marker row
years = list(reversed(range(1992, latestYear + 1)))
locs = df_tmp.set_index('Month').index.isin(years)
n = 0
dated = pd.Series(index=df_tmp.index, dtype='object')
for i, y in enumerate(locs):
#print(df_imcat.iloc[y])
if y == True:
currentYear = years[n]
n = n + 1
dated.iloc[i] = np.nan
continue
else:
dated.iloc[i] = pd.to_datetime(df_tmp.iloc[i]['Month'] + " " + str(currentYear), format="%B %Y")
df_tmp['dt'] = dated
return df_tmp
df_imcat = parseTradeTimeseries()
df_immelt = df_imcat[df_imcat.columns.to_list()[2:]].melt(id_vars='dt').dropna()
alt.Chart(df_immelt).mark_bar(size=2).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('sum(value):Q', axis=alt.Axis(title='Million USD')),
alt.Color('variable:N', title='Import Category', scale=alt.Scale(scheme='accent')),
tooltip=[alt.Tooltip('dt:T', format="%b %Y", title='Date'), alt.Tooltip('variable:N', title='Category'),
alt.Tooltip('value:Q', title='Million USD',format='$,.0d')]
).properties(
title='US Imports by Category',
width=750,
height=400
)
df_imyoy = df_imcat.iloc[:, 2:].dropna().set_index('dt').sort_index().pct_change(12).apply(lambda v: v * 100).reset_index()
df_imyoymelt = df_imyoy.melt(id_vars='dt').dropna()
"""alt.Chart(df_imyoymelt).mark_bar(width=1.5).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Year-over-Year Growth [% change]')),
alt.Color('variable:N', title='Export Category'),
alt.Row('variable:N'),
tooltip=[alt.Tooltip('dt:T', format="%b %Y"), alt.Tooltip('variable:N'), alt.Tooltip('value:Q')]
).properties(
title='US Import Growth by Category',
width=750,
height=400
)"""
doRidgeLineFor(df_imyoymelt[['dt', 'variable', 'value']].dropna(), 'dt', 'value', 'variable')
df_excat = parseTradeTimeseries("https://www.census.gov/foreign-trade/statistics/historical/NSAEXP.xls")
df_exmelt = df_excat[df_excat.columns.to_list()[2:]].melt(id_vars='dt').dropna()
c = alt.Chart(df_exmelt).mark_bar(size=2).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('sum(value):Q', axis=alt.Axis(title='Million USD')),
alt.Color('variable:N', title='Export Category', scale=alt.Scale(scheme='accent')),
tooltip=[alt.Tooltip('dt:T', format="%b %Y", title='Date'), alt.Tooltip('variable:N', title='Category'),
alt.Tooltip('value:Q', title='Million USD',format='$,.0d')]
).properties(
title='US Exports by Category',
width=750,
height=400
)
c.save('international-trade.png')
c.display()
df_exyoy = df_excat.iloc[:, 2:].dropna().set_index('dt').sort_index().pct_change(12).apply(lambda v: v * 100).reset_index()
df_exyoymelt = df_exyoy.melt(id_vars='dt').dropna()
"""alt.Chart(df_exyoymelt).mark_bar(width=2).encode(
alt.X('dt:T', axis=alt.Axis(title='')),
alt.Y('value:Q', axis=alt.Axis(title='Year-over-Year Growth [%]')),
alt.Color('variable:N', title='Export Category'),
alt.Row('variable:N'),
tooltip=[alt.Tooltip('dt:T', format="%b %Y"), alt.Tooltip('variable:N'), alt.Tooltip('value:Q')]
).properties(
title='US Export Growth by Category',
width=750,
height=400
)"""
doRidgeLineFor(df_exyoymelt[['dt', 'variable', 'value']].dropna(), 'dt', 'value', 'variable', title='US Export Growth by Category')
df_balmelt = df_exmelt.set_index(['dt', 'variable']).join(df_immelt.set_index(['dt', 'variable']), rsuffix='im')
df_balmelt['net'] = df_balmelt['value'] - df_balmelt['valueim']
df_balmelt = df_balmelt.reset_index()
doRidgeLineFor(df_balmelt[['dt', 'variable', 'net']].dropna(), 'dt', 'net', 'variable',
title='US Trade Balance by Category', overlap=1, step=80, hover='Million USD')