Creating Maps in R
This script outlines the steps that one would need to take to make their first map in R.
1. Load the libraries required
#install.packages("tidyverse")
library(tidyverse) # A family of packages used to clean, process, model, and visualize data
# install.packages("sf")
library(sf) # Offers Support for simple features, a standardized way to encode spatial vector data. Binds to 'GDAL' for reading and writing data, to 'GEOS' for geometrical operations, and to 'PROJ' for projection conversions and datum transformations.
# install.packages("devtools")
# devtools::install_github("Shelmith-Kariuki/rKenyaCensus")
library(rKenyaCensus) # Contains the 2019 Kenya Census data
# install.packages("ggplot2")
library(ggplot2) # Used for creating amazing pretty graphs in R
# install.packages("tmap") #Thematic maps are geographical maps in which spatial data distributions are visualized
library(tmap)
# install.packages("leaflet") # Used for creating interactive maps
library(leaflet)
2. Read in the Kenya boundaries shapefiles
### Method 1
# KenyaSHP <- st_read("../Documents/PersonalDevelopment/GIS/Kenya counties/Kenya_counties.shp", quiet = TRUE, stringsAsFactors = FALSE, as_tibble = TRUE)
### Method 2
KenyaSHP <- read_sf("Kenya counties/Kenya_counties.shp", quiet = TRUE, stringsAsFactors = FALSE,as_tibble = TRUE)
### To easily view the shapefile in RStudio View pane, you can drop the geometry column and view the rest of the data.
### View(KenyaSHP %>% st_drop_geometry())
2.1 Inspect a few rows
print(KenyaSHP[6:9], n = 3)
#> Simple feature collection with 47 features and 3 fields
#> geometry type: MULTIPOLYGON
#> dimension: XY
#> bbox: xmin: 33.91182 ymin: -4.702271 xmax: 41.90626 ymax: 5.430648
#> geographic CRS: WGS 84
#> # A tibble: 47 x 4
#> COUNTY Shape_Leng Shape_Area geometry
#> <chr> <dbl> <dbl> <MULTIPOLYGON [°]>
#> 1 Turkana 15.0 5.68 (((35.79593 5.344486, 35.79659 5.344676, 35.797…
#> 2 Marsab… 12.0 6.18 (((36.05061 4.456218, 36.23184 4.451243, 36.245…
#> 3 Mandera 7.36 2.12 (((41.62133 3.976735, 41.62272 3.978599, 41.623…
#> # … with 44 more rows
2.2 Inspect the column names
colnames(KenyaSHP)
#> [1] "OBJECTID" "AREA" "PERIMETER" "COUNTY3_" "COUNTY3_ID"
#> [6] "COUNTY" "Shape_Leng" "Shape_Area" "geometry"
#names(KenyaSHP)
2.3 Inspect the class of the shapefile
class(KenyaSHP)
#> [1] "sf" "tbl_df" "tbl" "data.frame"
2.4 Look at the variable data types
glimpse(KenyaSHP)
#> Rows: 47
#> Columns: 9
#> $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
#> $ AREA <dbl> 5.677, 6.177, 2.117, 4.610, 0.740, 1.713, 2.060, 0.877, 0.…
#> $ PERIMETER <dbl> 15.047, 11.974, 7.355, 9.838, 5.030, 8.311, 10.181, 5.964,…
#> $ COUNTY3_ <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
#> $ COUNTY3_ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
#> $ COUNTY <chr> "Turkana", "Marsabit", "Mandera", "Wajir", "West Pokot", "…
#> $ Shape_Leng <dbl> 15.046838, 11.974165, 7.355154, 9.838408, 5.030271, 8.3110…
#> $ Shape_Area <dbl> 5.67698507, 6.17683074, 2.11719607, 4.60958923, 0.74048058…
#> $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((35.79593 5...., MULTIPOLYGON …
2.5 View the geometry column
KenyaSHP_geometry <- st_geometry(KenyaSHP)
### View one geometry entry
KenyaSHP_geometry[[1]]
2.6 Geometry columns have their own class
class(KenyaSHP_geometry) #sfc, the list-column with the geometries for each feature
#> [1] "sfc_MULTIPOLYGON" "sfc"
class(KenyaSHP_geometry[[1]]) #sfg, the feature geometry of an individual simple feature
#> [1] "XY" "MULTIPOLYGON" "sfg"
3. Change the projection of the shapefiles
### This line is not necessary since the shapefile is already in the WGS 84 projection.
KenyaSHP <- st_transform(KenyaSHP, crs = 4326)
### Inspect the co-ordinate reference system
st_crs(KenyaSHP)
#> Coordinate Reference System:
#> User input: WGS 84
#> wkt:
#> GEOGCRS["WGS 84",
#> DATUM["World Geodetic System 1984",
#> ELLIPSOID["WGS 84",6378137,298.257223563,
#> LENGTHUNIT["metre",1]]],
#> PRIMEM["Greenwich",0,
#> ANGLEUNIT["degree",0.0174532925199433]],
#> CS[ellipsoidal,2],
#> AXIS["latitude",north,
#> ORDER[1],
#> ANGLEUNIT["degree",0.0174532925199433]],
#> AXIS["longitude",east,
#> ORDER[2],
#> ANGLEUNIT["degree",0.0174532925199433]],
#> ID["EPSG",4326]]
4. Load the data that we are going to map
We will use V4_T2.27: Distribution of Persons with Disability by Type of Disability, Sex, Area of Residence, County and Sub County
Disability_df <- V4_T2.27
5. Clean the data, so that the counties match those in the shapefile.
### Inspect the county names of the disability data
counties_Disability_df <- unique(Disability_df$County)
### Inspect the county names of the shape file
counties_KenyaSHP <- KenyaSHP %>%
st_drop_geometry() %>%
select(COUNTY) %>%
pull() %>%
unique()
### Convert the disability county names to title case
Disability_df <- Disability_df %>%
ungroup() %>%
mutate(County = tools::toTitleCase(tolower(County)))
### Inspect the county names of the disability data again
counties_Disability_df <- unique(Disability_df$County)
### Inspect the county names that are different in each of the datasets
unique(Disability_df$County)[which(!unique(Disability_df$County) %in% counties_KenyaSHP)]
#> [1] "Xxx" "Taita/Taveta" "Tharaka-Nithi" "Elgeyo/Marakwet"
#> [5] "Nairobi City"
### Clean the county names so that they match in both datasets
Disability_df <- Disability_df %>%
mutate(County = ifelse(County == "Taita/Taveta", "Taita Taveta",
ifelse(County == "Tharaka-Nithi", "Tharaka",
ifelse(County == "Elgeyo/Marakwet", "Keiyo-Marakwet",
ifelse(County == "Nairobi City", "Nairobi", County)))))
### Inspect the county names again to ensure that they now match.
unique(Disability_df$County)[which(!unique(Disability_df$County) %in% counties_KenyaSHP)]
#> [1] "Xxx"
6. Carry out some transformations on the data
Disability_df2 <- Disability_df %>%
filter(AdminArea == "County") %>%
select(-AdminArea, -SubCounty)
### unique(Disability_df2$County)[which(!unique(Disability_df2$County) %in% counties_KenyaSHP)]
### counties_KenyaSHP[which(!counties_KenyaSHP %in% unique(Disability_df2$County))]
7. Join the shapefile and the data
### Rename the COUNTY variable, to match the variable name in the shapefile data
Disability_df2 <- Disability_df2 %>%
rename(COUNTY = County)
### Ensure that there are no leading or trailing spaces in the county variable
KenyaSHP$COUNTY <- trimws(KenyaSHP$COUNTY)
Disability_df2$COUNTY <- trimws(Disability_df2$COUNTY)
### Merge the data
merged_df <- left_join(KenyaSHP, Disability_df2, by = "COUNTY")
### Sort the data so that the County variable appears first
merged_df <- merged_df %>%
select(COUNTY, everything())
8. Inspect the merged data
### View the data
# View(merged_df)
# View(merged_df %>% st_drop_geometry())
### Class of the merged data
class(merged_df)
#> [1] "sf" "tbl_df" "tbl" "data.frame"
### Column names
colnames(merged_df)
#> [1] "COUNTY" "OBJECTID" "AREA"
#> [4] "PERIMETER" "COUNTY3_" "COUNTY3_ID"
#> [7] "Shape_Leng" "Shape_Area" "geometry"
#> [10] "Vision_Total" "Vision_Male" "Vision_Female"
#> [13] "Hearing_Total" "Hearing_Male" "Hearing_Female"
#> [16] "Mobility_Total" "Mobility_Male" "Mobility_Female"
#> [19] "Cognition_Total" "Cognition_Male" "Cognition_Female"
#> [22] "SelfCare_Total" "SelfCare_Male" "SelfCare_Female"
#> [25] "Communication_Total" "Communication_Male" "Communication_Female"
### Variable types
glimpse(merged_df)
#> Rows: 47
#> Columns: 27
#> $ COUNTY <chr> "Turkana", "Marsabit", "Mandera", "Wajir", "West…
#> $ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
#> $ AREA <dbl> 5.677, 6.177, 2.117, 4.610, 0.740, 1.713, 2.060,…
#> $ PERIMETER <dbl> 15.047, 11.974, 7.355, 9.838, 5.030, 8.311, 10.1…
#> $ COUNTY3_ <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …
#> $ COUNTY3_ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
#> $ Shape_Leng <dbl> 15.046838, 11.974165, 7.355154, 9.838408, 5.0302…
#> $ Shape_Area <dbl> 5.67698507, 6.17683074, 2.11719607, 4.60958923, …
#> $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((35.79593 5...., MUL…
#> $ Vision_Total <dbl> 2907, 1137, 1736, 830, 1909, 1302, 1040, 2757, 1…
#> $ Vision_Male <dbl> 1308, 516, 894, 447, 862, 511, 402, 1214, 663, 2…
#> $ Vision_Female <dbl> 1599, 621, 842, 383, 1047, 791, 638, 1543, 796, …
#> $ Hearing_Total <dbl> 1817, 725, 1696, 1005, 1527, 908, 566, 1690, 934…
#> $ Hearing_Male <dbl> 903, 336, 855, 543, 733, 416, 265, 794, 469, 127…
#> $ Hearing_Female <dbl> 914, 389, 841, 462, 794, 492, 301, 896, 465, 145…
#> $ Mobility_Total <dbl> 3184, 1216, 2526, 1629, 2271, 1096, 980, 3805, 2…
#> $ Mobility_Male <dbl> 1628, 582, 1282, 877, 1051, 521, 446, 1732, 974,…
#> $ Mobility_Female <dbl> 1556, 634, 1243, 752, 1220, 575, 534, 2073, 1076…
#> $ Cognition_Total <dbl> 2071, 621, 2187, 1270, 1269, 612, 509, 1854, 973…
#> $ Cognition_Male <dbl> 1093, 303, 1117, 664, 593, 301, 248, 851, 445, 1…
#> $ Cognition_Female <dbl> 978, 318, 1069, 606, 676, 311, 261, 1003, 528, 2…
#> $ SelfCare_Total <dbl> 2230, 619, 2404, 1392, 1249, 680, 498, 1799, 980…
#> $ SelfCare_Male <dbl> 1163, 299, 1219, 751, 587, 318, 234, 863, 486, 1…
#> $ SelfCare_Female <dbl> 1067, 320, 1184, 641, 662, 362, 264, 936, 494, 1…
#> $ Communication_Total <dbl> 1429, 451, 1486, 760, 1030, 448, 358, 1217, 655,…
#> $ Communication_Male <dbl> 777, 230, 770, 422, 524, 234, 200, 639, 372, 139…
#> $ Communication_Female <dbl> 652, 221, 715, 338, 506, 214, 158, 577, 283, 112…
9. Visualise the data
9.1 plot()
We are going to plot a base plot / map.
plot(KenyaSHP$geometry, lty = 3, col = "darkgreen")
9.2 ggplot2()
map1 <- ggplot(data = merged_df)+
geom_sf(aes(geometry = geometry, fill = Vision_Total))+
theme_void()+
labs(title = "Distribution of Population with Vision Disability",
caption = "By: @Shel_Kariuki")+
theme(plot.title = element_text(family = "URW Palladio L, Italic",size = 16, hjust = 0.5),
legend.title = element_blank(),
plot.caption = element_text(family = "URW Palladio L, Italic",size = 12))+
#scale_fill_gradient(low = "darkgreen", high = "red")
scale_fill_viridis_c()
map1
9.3 tmap()
tmap_mode("plot") #Set tmap mode to static plotting or interactive viewing
map2 <- tm_shape(merged_df) +
tm_fill("Vision_Total",palette="Greens",
title="Distribution of Population with Vision Disability",
id = "COUNTY") +
tm_borders(col = "red",lty = 3)+
tm_layout(legend.position = c("left", "bottom"))
map2
9.4 leaflet()
### Specify the color scheme
pal <- colorBin(
palette = "YlOrRd",
domain = merged_df$Vision_Total
)
### Specify how labels will be displayed
labels <- sprintf(
"<strong>%s</strong><br/>%g",
merged_df$COUNTY, merged_df$Vision_Total
) %>% lapply(htmltools::HTML)
### Generate the graph
leaflet(merged_df) %>%
addTiles() %>%
addPolygons(color = "red", weight = 1, dashArray = "3", fillColor = ~pal(Vision_Total),
highlight = highlightOptions(
weight = 4,
color = "red",
dashArray = "",
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(position = c("bottomright"),pal = pal, values = ~Vision_Total)
We are done!!