Above are some new county-level heat maps of police shooting data, compiled from the Lott & Moody (2016) data for years 2013-2015, and the Washington Post for 2016. Below the graphs, the R code is provided.
R Code
1. Load the Data.
(I plan to upload these data sets to figshare shortly.)
library(readr)
library(tidyr)
library(dplyr)
vars <- fst::read.fst(path = "countyVars.csv")
df_wapoLott <- dplyr::left_join(vars, readr::read_csv("killed_lott_wapo.csv")) %>%
dplyr::mutate_at(vars(contains("killed")), funs(replace(., is.na(.), 0))) %>%
dplyr::mutate(killDummy = ifelse(killed == 0, "NO", "YES"), killDummy = factor(killDummy),
killRate = killed/pop.c.total_5yr, policeRatio = policeFullTime/pop.c.total_5yr,
crowding_prcnt = log(crowding_prcnt + 1), bw_income_gap = medianIncome_black/medianIncome_white_nh,
black_sq = (pop.c.black_5yr/pop.c.total_5yr)^2, pop = log(pop.c.total_5yr),
pop.blackPrcnt_log = log(pop.c.black_5yr/pop.c.total_5yr), pop.c.hispanic_5yr = log(pop.c.hispanic_5yr/pop.c.total_5yr),
pop.male.15_29_5yr = log(pop.male.15_29_5yr/pop.c.total_5yr))
crimeRateVars <- c("pc1Crime", "pc2Crime", "pc1Crime_3", "pc2Crime_3", "murder_r_4yr",
"dui_r_4yr", "drug_poss_r_4yr", "vc_r_4yr", "theft_r_4yr", "vandal_r_4yr")
crimeCountVars <- c("murder_4yr", "dui_4yr", "drug_poss_4yr", "vc_4yr", "theft_4yr",
"vandal_4yr")
sesVars <- c("medianIncome", "bw_income_gap", "gini", "unemploymentRate_5yr",
"singleParent_prcnt", "poverty", "sesall", "pc1SES", "pc2SES", "highschoolDropOut",
"ELA_6thgrade", "MATH_6thgrade", "residential_segregation_black_white",
"theil_whiteBlack")
popVars <- c("FIPS", "pop", "pop.c.total_5yr", "pop.blackPrcnt_log", "pop.c.black_5yr",
"black_sq", "pop.c.black_change", "pop.c.hispanic_5yr", "pop.male.15_29_5yr",
"policePrcntBlack", "policeRatio", "policeFullTime_log", "policeFullTime",
"crowding_prcnt")
depVars <- c("killRate", "killed", "killDummy")
allVars <- c(depVars, crimeRateVars, crimeCountVars, sesVars, popVars)
df_all <- df_wapoLott[, allVars]
Figure 1.
library(choroplethr)
library(ggplot2)
library(ggthemes)
library(choroplethrMaps)
library(RColorBrewer)
library(ggsci)
df <- df_all
data(state.map)
data(county.map)
df$value <- df$killed #set some variable to 'value' - can reset later
df$region <- as.numeric(substr(df$FIPS, start=2, stop=6))
mycolors <- ggsci::pal_d3(palette = "category10")
nColors <- mycolors(n=4)
nColors <- c( "ivory", "cyan", nColors)
sc <- scale_fill_gradientn(colours = nColors,
limits=c(0,1,10,20,30,50,100,200))
df$value<- cut(df$killed,
breaks=c(-1,0,1,10,50,100,200),
labels=c("None", "1", "2-10", "11-50", "51-100",
"> 100")
)
##To get Alaska and Hawaii to show up correctly...
#see http://stackoverflow.com/questions/38938565/alaska-and-hawaii-not-formatting-correctly-for-county-choropleth-map-in-r
choro = CountyChoropleth$new(df[which(!is.na(df$value)),])
choro$ggplot_scale = scale_fill_manual(values=nColors,
labels=c("None", "1", "2-10", "11-50", "51-100","> 100"),
na.value= "ivory"
)
newdf <- choro$render()
newdf$data$value[which(is.na(newdf$data$value))] <- "None"
sum_df <- newdf$data[] %>%
tibble::as_tibble() %>%
group_by(region) %>%
dplyr::summarise(
name = first(NAME),
long_av = mean(long),
lat_av = mean(lat)
)
df <- dplyr::left_join(df, sum_df)
df$popLabel <- paste0(round(df$pop.c.total_5yr/1e06,2), "M")
newdf +
theme_minimal() +
guides(fill=guide_legend(title = "Civilians Killed")) +
scale_fill_manual(values=nColors,
breaks = c("1", "2-10", "11-50", "51-100",
"> 100")) +
ggrepel::geom_label_repel(data= filter(df, killed > 50),
aes(label= paste0(name, " County",
"\n(n=", killed,
", pop. ", popLabel,
")"),
x=long_av, y=lat_av,
group=region),
min.segment.length = unit(0.1, "lines"),
size=3,
nudge_x = .75, nudge_y = -.5, fontface="bold",
lineheight=.85,
fill="white", alpha=0.9) +
theme(legend.position = c(0.9, 0.2),
legend.title = element_text(face="bold"),
text = element_text(face="bold"),
axis.text.x=element_blank(),
axis.text.y=element_blank()
) +
xlab("") +
ylab("") +
ggtitle(label="Civilians Killed by Police per County (2013-2016)",
subtitle = "Data from Lott & Moody (2013-2015) and Washington Post (2016).\nSource: johnbradford.github.io")
Figure 2.
library(choroplethr)
library(ggplot2)
library(ggthemes)
library(choroplethrMaps)
library(RColorBrewer)
library(ggsci)
df <- df_all
data(state.map)
data(county.map)
df$value <- df$killRate #set some variable to 'value' - can reset later
df$region <- as.numeric(substr(df$FIPS, start=2, stop=6))
mycolors <- ggsci::pal_d3(palette = "category10")
nColors <- mycolors(n=4)
nColors <- c( "ivory", "cyan", nColors)
#c("antiquewhite", "beige", "ghostwhite", "floralwhite", "ivory")
df$value<- cut(1e05*df$killRate,
breaks=c(-1,0,1,10,25,50,100),
labels=c("None", "Less than 1", "1-10", "10-25", "25-50", "> 50")
)
#Map
choro = CountyChoropleth$new(df[which(!is.na(df$value)),])
choro$ggplot_scale = scale_fill_manual(values=nColors,
labels=c("None", "Less than 1", "1-25", "25-50", "> 50"),
na.value= "ivory"
)
newdf <- choro$render()
newdf$data$value[which(is.na(newdf$data$value))] <- "None"
sum_df <- newdf$data[] %>%
tibble::as_tibble() %>%
group_by(region) %>%
dplyr::summarise(
name = first(NAME),
long_av = mean(long),
lat_av = mean(lat)
)
df <- dplyr::left_join(df, sum_df)
df$rateLabel <- round(1e05*df$killRate)
newdf +
theme_minimal() +
guides(fill=guide_legend(title = "Civilians Killed per 100,000")) +
scale_fill_manual(values=nColors,
breaks = c("Less than 1", "1-10", "10-25", "25-50", "> 50")) +
ggrepel::geom_label_repel(data= filter(df, 1e05*killRate > 50),
aes(label= paste0(name, " County", "\n", rateLabel, " per 100k",
"\n(n=", killed,
", pop. ", round(pop.c.total_5yr),
")"),
x=long_av, y=lat_av, group=region),
min.segment.length = unit(0.1, "lines"),
size=3, nudge_x = .75, nudge_y = -.5, fontface="bold",
lineheight=.85,
fill="white", alpha=0.9
) +
theme(legend.position = c(0.9, 0.2),
legend.title = element_text(face="bold"),
text = element_text(face="bold"),
axis.text.x=element_blank(),
axis.text.y=element_blank()
) +
xlab("") +
ylab("") +
ggtitle(label="Civilians Killed by Police per 100,000 Residents (2013-2016)",
subtitle = "Data from Lott & Moody (2013-2015), Washington Post (2016), and US Census 5-year county population averages (2011-2015).\nSource: johnbradford.github.io")
Figure 3.
library(choroplethr)
library(ggplot2)
library(ggthemes)
library(choroplethrMaps)
library(RColorBrewer)
library(ggsci)
df <- df_all
data(state.map)
data(county.map)
df$value <- df$killRate #set some variable to 'value' - can reset later
df$region <- as.numeric(substr(df$FIPS, start = 2, stop = 6))
mycolors <- ggsci::pal_d3(palette = "category10")
nColors <- mycolors(n = 4)
nColors <- c("ivory", "cyan", nColors)
df$value <- cut(1e+05 * df$killRate, breaks = c(-1, 0, 1, 10, 25, 50, 100),
labels = c("None", "Less than 1", "1-10", "10-25", "25-50", "> 50"))
# Map
choro = CountyChoropleth$new(df[which(!is.na(df$value)), ])
choro$ggplot_scale = scale_fill_manual(values = nColors, labels = c("None",
"Less than 1", "1-25", "25-50", "> 50"), na.value = "ivory")
newdf <- choro$render()
newdf$data$value[which(is.na(newdf$data$value))] <- "None"
sum_df <- newdf$data[] %>% tibble::as_tibble() %>% group_by(region) %>% dplyr::summarise(name = first(NAME),
long_av = mean(long), lat_av = mean(lat))
df <- dplyr::left_join(df, sum_df)
df$rateLabel <- round(1e+05 * df$killRate)
newdf + theme_minimal() + guides(fill = guide_legend(title = "Civilians Killed per 100,000")) +
scale_fill_manual(values = nColors, breaks = c("Less than 1", "1-10", "10-25",
"25-50", "> 50")) +
ggrepel::geom_label_repel(data = dplyr::filter(df, 1e+05 * killRate > 10, pop.c.total_5yr >
10000), aes(label = paste0(name, " County", "\n", rateLabel, " per 100k",
"\n(n=", killed, ", pop. ", round(pop.c.total_5yr), ")"), x = long_av, y = lat_av,
group = region), min.segment.length = unit(0.1, "lines"), size = 3, nudge_x = 0,
nudge_y = -0.5, fontface = "bold", lineheight = 0.85, fill = "white", alpha = 0.9) +
theme(legend.position = c(0.9, 0.2), legend.title = element_text(face = "bold"),
text = element_text(face = "bold"), axis.text.x = element_blank(), axis.text.y = element_blank()) +
xlab("") + ylab("") + ggtitle(label = "Killed by Police Rates (2013-2016) in Counties with at least 10,000 Residents",
subtitle = "Data from Lott & Moody (2013-2015), Washington Post (2016), and US Census 5-year county population averages (2011-2015).\nSource: johnbradford.github.io")