library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.4     v purrr   0.3.4
## v tibble  3.1.2     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(knitr)
library(readxl)
library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(ggplot2)
library(ggthemes)
library(sf)

## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1

Data

url =  'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
covid = read_csv(url)

## 
## -- Column specification --------------------------------------------------------
## cols(
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character(),
##   fips = col_character(),
##   cases = col_double(),
##   deaths = col_double()
## )

landDate = read.csv("../data/landdata-states.csv")
PopulationEstimates = read_excel("../data/PopulationEstimates.xls", skip=2)

Question 1

state.of.interest = "California"

# County Level
covid %>%
  filter(state == state.of.interest, date == max(date)) %>%
  slice_max(cases, n=5) %>% 
knitr::kable(caption = "Counties with the Most COVID Cases")

Counties with the Most COVID Cases
date	county	state	fips	cases	deaths
2021-07-29	Los Angeles	California	06037	1294135	24675
2021-07-29	Riverside	California	06065	315080	4659
2021-07-29	San Diego	California	06073	311011	3798
2021-07-29	San Bernardino	California	06071	309779	5236
2021-07-29	Orange	California	06059	281790	5143

covid %>% 
  filter(state == state.of.interest) %>% 
  group_by(county) %>% 
  mutate(newCases = cases - lag(cases)) %>% 
  ungroup() %>%
  filter(date == max(date)) %>% 
  slice_max(newCases, n=5) %>% 
knitr::kable(caption = "Top 5 Counties with the Most COVID Cases")

Top 5 Counties with the Most COVID Cases
date	county	state	fips	cases	deaths	newCases
2021-07-29	Los Angeles	California	06037	1294135	24675	3224
2021-07-29	San Diego	California	06073	311011	3798	768
2021-07-29	San Bernardino	California	06071	309779	5236	616
2021-07-29	Riverside	California	06065	315080	4659	594
2021-07-29	Orange	California	06059	281790	5143	562

pop = PopulationEstimates %>% 
  select("Area_Name", population = "POP_ESTIMATE_2019", fips = "FIPStxt")

covidpop = inner_join(covid, pop, by = "fips")

covidpop %>% 
  filter(state == state.of.interest, date == max(date)) %>% 
  mutate(perCapita= cases/population) %>% 
  slice_max(perCapita, n = 5) %>% 
knitr::kable(caption = "Top 5 Cases Per Capita")

Top 5 Cases Per Capita
date	county	state	fips	cases	deaths	Area_Name	population	perCapita
2021-07-29	Lassen	California	06035	5876	26	Lassen County	30573	0.1921957
2021-07-29	Imperial	California	06025	32040	742	Imperial County	181215	0.1768066
2021-07-29	Kings	California	06031	24773	250	Kings County	152940	0.1619786
2021-07-29	San Bernardino	California	06071	309779	5236	San Bernardino County	2180085	0.1420949
2021-07-29	Tulare	California	06107	60441	854	Tulare County	466195	0.1296475

covidpop %>% 
  filter(state == state.of.interest) %>% 
  group_by(county, state) %>% 
  filter(date > (max(date)-14)) %>% 
  summarise(min_cases = min(cases), max_cases = max(cases), population = mean(population)) %>% 
  mutate(diff = max_cases-min_cases) %>% 
  ungroup() %>% 
  mutate(NewperCap = diff/population) %>% 
  slice_max(NewperCap, n = 5) %>% 
knitr::kable(caption = "Top 5 New Cases Per Capita")

## `summarise()` has grouped output by 'county'. You can override using the `.groups` argument.

Top 5 New Cases Per Capita
county	state	min_cases	max_cases	population	diff	NewperCap
Lake	California	3774	4204	64386	430	0.0066785
Del Norte	California	1522	1643	27812	121	0.0043506
Yuba	California	6691	6985	78668	294	0.0037372
Tuolumne	California	4322	4507	54478	185	0.0033959
Sacramento	California	112421	117054	1552058	4633	0.0029851

Question 2

states.of.interest <- c("New York", "California", "Louisiana", "Florida")

# State level
StateLevel <- covid %>% 
  filter(state %in% states.of.interest) %>% 
  group_by(state, date) %>% 
  summarise(cases = sum(cases)) %>% 
  mutate(newCases = cases - lag(cases),
         roll7 = rollmean(newCases, 7, fill = NA, align="right")) %>% 
  ungroup()

## `summarise()` has grouped output by 'state'. You can override using the `.groups` argument.

ggplot(data = StateLevel, aes(x = date, y = roll7, group = state)) +
  geom_line(aes(col=state)) +
  labs( x = "Date", 
        y = "Daily New Cases", 
        title = "COVID-19 Cases in Ca, Ny, La, Fl",
        subtitle = "7 Day Rolling Mean",
        caption = "Source: New York Times") +
  facet_grid(~state) +
  theme_light() +
  theme(plot.background = element_rect(fill = "white"),
          panel.background = element_rect(fill = "white"),
          plot.title = element_text(size = 14, face = 'bold')) +
  theme(aspect.ratio = 0.75) +
  theme(axis.text.x = element_text(angle = 90))

## Warning: Removed 28 row(s) containing missing values (geom_path).

stateCovid = inner_join(pop, covid, by = "fips")

stateCovid %>% 
  filter(state %in% c("California", "Florida", "Louisiana", "New York")) %>% 
  group_by(state, date) %>% 
  summarise(cases = sum(cases)) %>% 
  mutate(NewCases = cases - lag(cases),
         roll7 = rollmean(NewCases, 7, fill = NA, align="right")) %>% 
  ungroup()

## `summarise()` has grouped output by 'state'. You can override using the `.groups` argument.

## # A tibble: 2,089 x 5
##    state      date       cases NewCases  roll7
##    <chr>      <date>     <dbl>    <dbl>  <dbl>
##  1 California 2020-01-25     1       NA NA    
##  2 California 2020-01-26     2        1 NA    
##  3 California 2020-01-27     2        0 NA    
##  4 California 2020-01-28     2        0 NA    
##  5 California 2020-01-29     2        0 NA    
##  6 California 2020-01-30     2        0 NA    
##  7 California 2020-01-31     3        1 NA    
##  8 California 2020-02-01     3        0  0.286
##  9 California 2020-02-02     6        3  0.571
## 10 California 2020-02-03     6        0  0.571
## # ... with 2,079 more rows

Question 3 (extra credit)

#counties = USAboundaries::us_counties() %>% 
 # select("fips" = geoid, name, state_name) %>% 
  #st_centroid() %>% 
  #mutate(LON = st_coordinates(.)[,1], 
   #      LAT = st_coordinates(.)[,2]) %>% 
#  st_drop_geometry()

Geography 13

Lab 02: COVID-19 Pandemic

Haylee Damron

Data

Question 1

Question 2

Question 3 (extra credit)