Loading required package: sp
# The code is written and tested on a x86_64-pc-linux-gnu (64-bit) machine
# R version 3.3.2
# load required packages
library(tidyverse)      # version 1.0.0
library(viridis)        # version 0.3.4
library(ggthemes)       # version 3.2.0
library(magrittr)       # version 1.5
library(stringr)        # version 1.1.0
library(rgdal)          # version 1.2-4
library(rgeos)          # version 0.3-21
library(maptools)       # version 0.8-40
# create sub-directories
ifelse(!dir.exists('data'),dir.create('data'),paste("Directory already exists"))
[1] TRUE
ifelse(!dir.exists('geodata'),dir.create('geodata'),paste("Directory already exists"))
[1] TRUE

1. My visualization

As a showcase of my R skills I decided to grab (and modify a bit) some lines of code that I produced for a blog post (it’s in Russian), in which I did some re-analysis of a paper recently published in Human Nature 1. The code to replicate all results in the post can be found in this gist.

The selected code will do the following:
1. download unemployment data to be visualized;
2. download geodata for the US counties;
3. transform and prepare geodata;
4. produce beautiful maps.

Download unemployment data

url <- "https://www.ers.usda.gov/webdocs/DataFiles/CountyLevel_Data_Sets_Download_Data__18026//Unemployment.xls"
download.file(url = url, destfile = 'data/us_unemp.xls', mode="wb")
trying URL 'https://www.ers.usda.gov/webdocs/DataFiles/CountyLevel_Data_Sets_Download_Data__18026//Unemployment.xls'
Content type 'application/vnd.ms-excel' length 1357312 bytes (1.3 MB)
==================================================
downloaded 1.3 MB
readxl::excel_sheets(path = 'data/us_unemp.xls')
[1] "Unemployment Update"   "Variable Descriptions"
df_us <- readxl::read_excel(path = 'data/us_unemp.xls', sheet = "Unemployment Update", skip = 6)

Let us clean the dataset a bit.

names(df_us) %<>% tolower()
df_us %<>% select(1:6, contains('rate')) 

Download geodata

The solution picked up from an SO answer

f <- tempfile()
download.file("http://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_050_00_20m.zip", destfile = f)
trying URL 'http://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_050_00_20m.zip'
Content type 'application/zip' length 1307976 bytes (1.2 MB)
==================================================
downloaded 1.2 MB
unzip(f, exdir = "geodata/.")
US <- readOGR("geodata/.", "gz_2010_us_050_00_20m")
OGR data source with driver: ESRI Shapefile 
Source: "geodata/.", layer: "gz_2010_us_050_00_20m"
with 3221 features
It has 6 fields

Reproject geodata

US_prj <- spTransform(US, CRS('+init=epsg:2163'))
names(US_prj@data) %<>%  str_to_lower()
US_prj@data %<>% mutate(id = str_sub(geo_id,10,14))
row.names(US_prj) <- US_prj$id

Transform Alaska and Hawaii to fit in the map. The solution fond in an RPubs document

alaska <-  US_prj[US_prj$state=="02",]
alaska <- elide(alaska, rotate=-36)
alaska <- elide(alaska, scale=max(apply(bbox(alaska), 1, diff)) / 2.5)
alaska <-  elide(alaska, shift=c(-2500000, -2200000))
proj4string(alaska) <- proj4string(US_prj)
hawaii <- US_prj[US_prj$state=="15",]
hawaii <- elide(hawaii, rotate=-35)
hawaii <- elide(hawaii, shift=c(5100000, -1300000))
proj4string(hawaii) <- proj4string(US_prj)
US_prj <- US_prj[!US_prj$state %in% c("02","15","72"),]
US_prj <- rbind(US_prj, alaska, hawaii)
# fortify
gd_county <- fortify(US_prj, region = 'id')

Now do the same for the states shapefile.

f <- tempfile()
download.file("http://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_040_00_20m.zip", destfile = f)
trying URL 'http://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_040_00_20m.zip'
Content type 'application/zip' length 648959 bytes (633 KB)
==================================================
downloaded 633 KB
unzip(f, exdir = "geodata/.")
US_st <- readOGR("geodata/.", "gz_2010_us_040_00_20m")
OGR data source with driver: ESRI Shapefile 
Source: "geodata/.", layer: "gz_2010_us_040_00_20m"
with 52 features
It has 5 fields
# reproject geodata
US_st_prj <- spTransform(US_st, CRS('+init=epsg:2163'))
names(US_st_prj@data) <- str_to_lower(names(US_st_prj@data))
row.names(US_st_prj) <- paste(US_st_prj$state)
alaska_st <-  US_st_prj[US_st_prj$state=="02",]
alaska_st <- elide(alaska_st, rotate=-36)
alaska_st <- elide(alaska_st, scale=max(apply(bbox(alaska_st), 1, diff)) / 2.5)
alaska_st <-  elide(alaska_st, shift=c(-2500000, -2200000))
proj4string(alaska_st) <- proj4string(US_st_prj)
hawaii_st <- US_st_prj[US_st_prj$state=="15",]
hawaii_st <- elide(hawaii_st, rotate=-35)
hawaii_st <- elide(hawaii_st, shift=c(5100000, -1300000))
proj4string(hawaii_st) <- proj4string(US_st_prj)
US_st_prj <- US_st_prj[!US_st_prj$state %in% c("02","15","72"),]
US_st_prj <- rbind(US_st_prj, alaska_st, hawaii_st)
# fortify
gd_state <- fortify(US_st_prj, region = 'state')

To plot a nice map at county level, we need to identify states’ borders as a ploy-line object from the polygon object. To do this I wrote a special function using a solution from an SO answer.

identify_borders <- function(SPolyDF){
        require(rgeos)
        require(sp)
        borders <- gDifference(
                as(SPolyDF,"SpatialLines"),
                as(gUnaryUnion(SPolyDF),"SpatialLines"),
                byid=TRUE)
        df <- data.frame(len = sapply(1:length(borders), function(i) gLength(borders[i, ])))
        rownames(df) <- sapply(1:length(borders), function(i) borders@lines[[i]]@ID)
        SLDF <- SpatialLinesDataFrame(borders, data = df)
        return(SLDF)
}
US_st_borders <- identify_borders(US_st_prj)
gd_state_borders <- fortify(US_st_borders)

Finally, it would be nice to plot major cities.

# major cities
f <- tempfile()
download.file("https://prd-tnm.s3.amazonaws.com/StagedProducts/Small-scale/data/Structures/citiesx020_nt00007.tar.gz", destfile = f)
trying URL 'https://prd-tnm.s3.amazonaws.com/StagedProducts/Small-scale/data/Structures/citiesx020_nt00007.tar.gz'
Content type 'application/x-gzip' length 1186226 bytes (1.1 MB)
==================================================
downloaded 1.1 MB
dir.create('geodata/us_cities')
untar(f, exdir = "geodata/us_cities/.")
cities <- readOGR(dsn = 'geodata/us_cities', layer = 'citiesx020')
OGR data source with driver: ESRI Shapefile 
Source: "geodata/us_cities", layer: "citiesx020"
with 35432 features
It has 11 fields
cities_sub <- cities[cities$FEATURE%in%c("State Capital","State Capital   County Seat") | 
                             cities$POP_RANGE%in%c("1,000,000 - 9,999,999","500,000 - 999,999"),]
cities_sub@data <- cities_sub@data %>% droplevels()
proj4string(cities_sub) <- CRS('+proj=longlat')
cities_prj <- spTransform(cities_sub, CRS('+init=epsg:2163'))
gd_cities <- data.frame(cities_prj) %>%
        transmute(id = CITIESX020, long = coords.x1, lat = coords.x2,
                  name = NAME, fips = FIPS, state = STATE,
                  huge = POP_RANGE%in%c("1,000,000 - 9,999,999","500,000 - 999,999"),
                  capital = FEATURE%in%c("State Capital","State Capital   County Seat"))
# adjust positions for Juneau (AK) and Honolulu (HI)
gd_cities[gd_cities$state=='AK','long'] <- -1270000
gd_cities[gd_cities$state=='AK','lat'] <- -2030000
gd_cities[gd_cities$state=='HI','long'] <- -775000
gd_cities[gd_cities$state=='HI','lat'] <- -1900000
# SAVE GEODATA
save(gd_state_borders, gd_state, gd_county, gd_cities,
     file = 'geodata/us_geodata.RData')

Create templates for maps

basemap_cont <- ggplot()+
        geom_polygon(data = gd_county, aes(x=long, y=lat, group=group), fill='grey50')+
        guides(fill = guide_colorbar(barwidth = 1, barheight = 10))+
        coord_equal(xlim = c(-2100000,3300000),ylim = c(-2400000,800000),expand = c(0,0))+
        theme_map()+
        theme(panel.border=element_rect(color = 'black',size=.5,fill = NA),
              panel.background=element_rect(fill='grey15'),
              legend.position = c(1, 0),
              legend.justification = c(1, 0),
              legend.background = element_rect(colour = NA, fill = 'grey95'),
              legend.title = element_text(size=15),
              legend.text = element_text(size=15))+
        scale_x_continuous(expand=c(0,0)) +
        scale_y_continuous(expand=c(0,0)) +
        labs(x = NULL, y = NULL)
basemap_disc <- ggplot()+
        geom_polygon(data = gd_county, aes(x=long, y=lat, group=group), fill='grey50')+
        coord_equal(xlim = c(-2100000,3300000),ylim = c(-2400000,800000),expand = c(0,0))+
        theme_map()+
        theme(panel.border=element_rect(color = 'black',size=.5,fill = NA),
              panel.background=element_rect(fill='grey15'),
              legend.position = c(1, 0),
              legend.justification = c(1, 0),
              legend.background = element_rect(colour = NA, fill = 'grey95'),
              legend.title = element_text(size=15),
              legend.text = element_text(size=15))+
        scale_x_continuous(expand=c(0,0)) +
        scale_y_continuous(expand=c(0,0)) +
        labs(x = NULL, y = NULL)

Map of unemployment rates in 2015

basemap_cont +
        geom_map(map = gd_county, data = df_us, aes(map_id=fips_code, fill=unemployment_rate_2015))+
        geom_path(data = gd_state_borders, aes(x=long, y=lat, group=group), 
                  color='grey50', size = .5)+
        geom_point(data = gd_cities %>% filter(capital==T, huge==F), 
                   aes(x=long, y=lat), color = 'red', size = 3, pch=1)+
        geom_point(data = gd_cities %>% filter(capital==T, huge==T), 
                   aes(x=long, y=lat), color = 'red', size = 5, pch=1)+
        geom_point(data = gd_cities %>% filter(capital==F, huge==T), 
                   aes(x=long, y=lat), color = 'gold', size = 5, pch=1)+
        scale_fill_gradientn('Unemployment\nrate, %\n', colours = viridis(100), trans = 'log', 
                             breaks = c(2,5,10,20))

NA

Map of urban-rural classification

basemap_disc +
        geom_map(map = gd_county, data = df_us, aes(map_id=fips_code, fill=factor(rural_urban_continuum_code_2013)))+
        geom_path(data = gd_state_borders, aes(x=long, y=lat, group=group), 
                  color='grey50', size = .5)+
        scale_fill_viridis('Urban\nRural\ncounty\nclassification\n', option = 'B', discrete = T, direction = -1)+
        
        geom_point(data = gd_cities %>% filter(capital==T, huge==F), 
                   aes(x=long, y=lat), color = 'red', size = 3, pch=1)+
        geom_point(data = gd_cities %>% filter(capital==T, huge==T), 
                   aes(x=long, y=lat), color = 'red', size = 5, pch=1)+
        geom_point(data = gd_cities %>% filter(capital==F, huge==T), 
                   aes(x=long, y=lat), color = 'purple4', size = 5, pch=1)