You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

121 lines
3.2 KiB

# %%
import pandas as pd
import requests
import matplotlib.pyplot as plt
import matplotlib
import json
from scipy import interpolate
import numpy as np
import datetime
# %% Use LaTeX, XXX might not need this
# plt.rc('text', usetex=True)
plt.rc('font', family='serif')
params = {"ytick.color" : "w",
"xtick.color" : "w",
"axes.labelcolor" : "w",
"axes.edgecolor" : "w"}
plt.rcParams.update(params)
# %% Function to convert sizes in cm for figure size
def cm2inch(*tupl):
inch = 2.54
if isinstance(tupl[0], tuple):
return tuple(i/inch for i in tupl[0])
else:
return tuple(i/inch for i in tupl)
# %% Countries
countries = {
'uk': {'name': 'UK'}
}
# %% Data
for country_code, country_data in countries.items():
request_url = 'https://disease.sh/v3/covid-19/historical/' + country_code + '?lastdays=all'
r = requests.get(request_url)
x = r.json()
df = pd.DataFrame(x['timeline'])
df.index = pd.to_datetime(df.index)
# Process data
df['daily_cases'] = df['cases'].diff().abs() # .abs() dirty trick to prevent negative outliers
df['daily_cases_avg'] = df['daily_cases'].rolling(7).mean()
df['cases_change'] = df['daily_cases_avg'].diff()
# Smoothing
df = df.resample('4H').asfreq()
df = df.interpolate(method='spline', order=5)
country_data['dataframe'] = df
# %%
# cutoff date
df = df[~(df.index > '2020-09-09')]
# %%
df = pd.read_pickle("2020-09-09_data.pkl")
countries = {
'uk': {'name': 'UK', 'dataframe': df}
}
# %% Plotting
fig, ax = plt.subplots(figsize=cm2inch(15,18))
for country_code, country_data in countries.items():
df = df
line, = ax.plot(
df['cases_change'],
df['daily_cases_avg'],
lw=0.5, c='#00FF00', label=country_data['name'])
# select which dates to label
df['month'] = df.index.month
df['month_change'] = df['month'].diff()
month_start = df['month_change'] == 1
mar_onwards = df['month'] >= 1 # used to be 3 to cut labels in 2020
labeldates = pd.concat([df[month_start & mar_onwards], df.tail(1)])
# date labels
for index, row in labeldates.iterrows():
date_text = row.name.strftime(format='%d/%m')
ax.annotate(date_text,
xy=(row['cases_change'], row['daily_cases_avg']),
xycoords='data',
xytext=(28,-2),
textcoords='offset points',
ha='right',
size=8,
color='w')
# to use same color as current cuvre: color=line.get_color()
# to add background: bbox=dict(boxstyle='square, pad=0.5', alpha=0.7, fc='white', ec='white')
# date markers
ax.scatter(row['cases_change'], row['daily_cases_avg'], color='w', s=10)
# Date legend
ax.scatter(1250, 0, color='w', s=10)
ax.annotate('Dates: day/month 2020/21', xy=(220,12), xycoords='axes points', color='w')
# Line at x=0
plt.axvline(x=0, c='white', lw=1, ls=':')
# remove frame
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set(ylabel="UK Daily COVID-19 Cases", xlabel="Increase or decrease in cases per day")
#ax.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5,1.15))
#plt.show()
plt.savefig('tornado_plot.pdf', transparent=True)
# %%
# %%