How far is the US from herd immunity?
The goal of this notebook is to help myself understand the path of different US states towards herd immunity towards COVID-19. We always see charts about active and cumulative cases alone or jsut vaccinations alone. I wanted to bring them together to understand the numbers who are in some way immune (fought COVID-19 and survived or received a vaccine shot)
- State populations
- State abbreviations
- Reading US vaccine data
- Reading Covid Case counts data
- How to take into account the undetected positive cases?
I am using three different sources to do the analysis.
- US COVID-19 vaccination data
- US state population data
- US COVID-19 daily cases data
!pip install hvplot
!apt-get install libgeos++ libproj-dev
!pip install geoviews
!pip install geopandas
import pandas as pd
import numpy as np
import geopandas as gpd
from datetime import datetime,timedelta
import hvplot.pandas
import holoviews as hv
#import matplotlib.pyplot as plt
import panel as pn
from panel.interact import interact, interactive
from panel import widgets
from datetime import datetime
us_states_population_data_source = "https://raw.githubusercontent.com/COVID19Tracking/associated-data/master/us_census_data/us_census_2018_population_estimates_states.csv"
us_state_populations = pd.read_csv(us_states_population_data_source)
us_state_populations.head()
state_abbrev_source = "https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv"
state_codes = pd.read_csv(state_abbrev_source)
state_codes.head()
Source for this data is wor world in data's github page. I am using the
us_vaccination_data_path = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/us_state_vaccinations.csv"
us_vaccination_data = pd.read_csv(us_vaccination_data_path)
us_vaccination_data.head()
us_vaccination_data_w_state_code = us_vaccination_data.merge(state_codes,
how='inner',
left_on="location",
right_on="State")
us_vaccination_data_w_state_code[us_vaccination_data_w_state_code["people_vaccinated"].isna()]
us_vaccination_data_w_state_code["date"] = pd.to_datetime(us_vaccination_data_w_state_code["date"])
hv.extension('bokeh')
us_vaccination_data_w_state_code.hvplot.line(x='date',
y='people_vaccinated',
by="State",
rot=90,
height=500,
width=900)
us_vaccination_data.shape
us_vaccination_data["location"].unique(), us_vaccination_data["location"].nunique()
us_vaccination_data_w_state_code_latest_date = us_vaccination_data_w_state_code[us_vaccination_data_w_state_code["date"]==us_vaccination_data_w_state_code["date"].max()]
hv.extension('bokeh')
us_vaccination_data_w_state_code_latest_date.hvplot.bar(x="Abbreviation",
y="people_vaccinated",
rot=90)
hv.extension('bokeh')
us_vaccination_data_w_state_code_latest_date.sort_values(by="total_vaccinations_per_hundred",ascending=False).hvplot.bar(x="State",
y="total_vaccinations_per_hundred",
height=700,
width = 1500,
rot=90)
Alaska and West Virginia are killing it in vaccinating their populations
def get_us_states_latest_case_report_path(del):
date_str_for_jhu_daily_report = (datetime.now()-timedelta(days=del)).strftime("%m-%d-%Y")
us_states_covid_daily_report_path = ("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/{}.csv".format(date_str_for_jhu_daily_report))
return us_states_covid_daily_report_path
try:
us_states_latest_case_data = pd.read_csv(get_us_states_latest_case_report_path(del=1))
except:
us_states_latest_case_data = pd.read_csv(get_us_states_latest_case_report_path(del=2))
us_states_latest_case_data.head()
us_states_latest_case_data.shape
hv.extension('bokeh')
us_states_latest_case_data.sort_values(by="Confirmed",
ascending=False).hvplot.bar(x="Province_State",
y="Confirmed",
width=1000,
height = 600,
rot=90)
us_states_latest_case_data["Recovered"].isna().sum()
hv.extension('bokeh')
us_states_latest_case_data.sort_values(by="Recovered",
ascending=False).hvplot.bar(x="Province_State",
y="Recovered",
width=1000,
height = 600,
rot=90)
hv.extension('bokeh')
us_states_latest_case_data.sort_values(by="Case_Fatality_Ratio",
ascending=False).hvplot.bar(x="Province_State",
y="Case_Fatality_Ratio",
width=1000,
height = 600,
rot=90)
us_states_latest_case_data_gdf = (gpd.GeoDataFrame(us_states_latest_case_data,
geometry=gpd.points_from_xy(us_states_latest_case_data.Long_,us_states_latest_case_data.Lat))
)
us_states_latest_case_data_gdf.head()
us_states_latest_case_data_gdf = us_states_latest_case_data_gdf.merge(state_codes,
how="inner",
left_on="Province_State",
right_on="State")
us_states_latest_case_data_gdf.shape
us_states_latest_case_data_gdf.head()
import plotly.graph_objects as go
geo_df = us_states_latest_case_data_gdf.copy()
fig = go.Figure(data=go.Choropleth(locations=geo_df["Abbreviation"],
locationmode="USA-states",
z=geo_df["Confirmed"],
colorscale = "Reds",
colorbar_title="Cumulative positive cases"))
#fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(
title_text = 'Total COVID-19 cases by State',
geo_scope='usa', # limite map scope to USA
)
fig.show()
fig = go.Figure(data=go.Choropleth(locations=geo_df["Abbreviation"],
locationmode="USA-states",
z=geo_df["Recovered"],
colorscale = "Greens",
colorbar_title="Cumulative positive cases"))
#fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(
title_text = 'Total COVID-19 cases by State',
geo_scope='usa', # limite map scope to USA
)
fig.show()
us_states_latest_case_data_gdf_w_population = us_states_latest_case_data_gdf.merge(us_state_populations,
how="left",
left_on="Abbreviation",
right_on="state")
us_states_latest_case_data_gdf_w_population.head()
us_vaccination_data_w_state_code_latest_date.query("Abbreviation=='NY'")
us_states_latest_case_data_gdf_w_population_and_vaccination = us_states_latest_case_data_gdf_w_population.merge(us_vaccination_data_w_state_code_latest_date,
how="inner",
on="Abbreviation")
us_states_latest_case_data_gdf_w_population_and_vaccination.shape
us_states_latest_case_data_gdf_w_population_and_vaccination.head()
def impute_recovered_data(row):
"""
In case of recovered data is not available,
we can use the CFR data to calculate the expected recoveries
"""
if pd.isnull(row["Recovered"]):
return row["Confirmed"]*(1-(row["Case_Fatality_Ratio"]/100))
else:
return row["Recovered"]
us_states_latest_case_data_gdf_w_population_and_vaccination["Recovered_"] = us_states_latest_case_data_gdf_w_population_and_vaccination.apply(impute_recovered_data,axis=1)
us_states_latest_case_data_gdf_w_population_and_vaccination["Recovered_"].isna().sum()
us_states_latest_case_data_gdf_w_population_and_vaccination["expected_immune_population"] = (us_states_latest_case_data_gdf_w_population_and_vaccination["Recovered_"] +
us_states_latest_case_data_gdf_w_population_and_vaccination["people_vaccinated"])
us_states_latest_case_data_gdf_w_population_and_vaccination["expected_immune_population_p_100"] = (us_states_latest_case_data_gdf_w_population_and_vaccination["expected_immune_population"]/
us_states_latest_case_data_gdf_w_population_and_vaccination["population"])*100
def plot_choropeth_usa(geo_df,col,colorscale,title):
fig = go.Figure(data=go.Choropleth(locations=geo_df["Abbreviation"],
locationmode="USA-states",
z=geo_df[col],
colorscale = colorscale,
colorbar_title=col))
#fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(
title_text = title,
geo_scope='usa', # limite map scope to USA
)
fig.show()
us_states_latest_case_data_gdf_w_population_and_vaccination.query("state=='NY'")
plot_choropeth_usa(geo_df=us_states_latest_case_data_gdf_w_population_and_vaccination,
col="expected_immune_population_p_100",
colorscale="Greens",
title="The percentage of population immune to COVID-19 in each state as of {}".format(datetime.now()))
us_states_latest_case_data_gdf_w_population_and_vaccination.head()