Factorizing variables in R

There are 5 ways (that I know of) in which we can factorise multiple variables in R.

  • One by one (an option for beginners).
  • Using a for loop.
  • lapply()
  • forcats::fct_relevel()
  • purrr::map()

0. Load the packages that we will require


## Create a vector of packages that we will need
pkgs <- c("dplyr","purrr","repurrrsive")

## Check if there are variables you want to load, that are not already installed. 
miss_pkgs <- pkgs[!pkgs %in% installed.packages()[,1]] 

## Installing the missing packages
if(length(miss_pkgs)>0){
  install.packages(miss_pkgs)
}

## Loading all the packages
invisible(lapply(pkgs,library,character.only=TRUE))

## Remove the objects that are no longer required
rm(miss_pkgs)
rm(pkgs)

Assuming you have a matrix type question (where respondents were asked to rate the questions on a scale of 1 - 5, where 1 represents Strongly Disagree and 5 represents Strongly Agree).

set.seed(2020)

vec <- 1:5
Questions <- paste0("Opinion_",vec)
opinion_df <- data.frame(matrix("", ncol=5, nrow=200))
names(opinion_df) <- Questions

for(i in 1: length(Questions)){
 opinion_df[,Questions[i]] <- sample(c("Strongly Agree", "Agree","Neutral","Disagree", "Strongly Disagree"),size = 200,replace = TRUE) 
}

head(opinion_df, 10)
##            Opinion_1         Opinion_2         Opinion_3         Opinion_4
## 1           Disagree          Disagree             Agree          Disagree
## 2           Disagree             Agree             Agree Strongly Disagree
## 3     Strongly Agree           Neutral    Strongly Agree           Neutral
## 4     Strongly Agree Strongly Disagree             Agree           Neutral
## 5           Disagree           Neutral             Agree    Strongly Agree
## 6              Agree           Neutral           Neutral           Neutral
## 7     Strongly Agree           Neutral Strongly Disagree    Strongly Agree
## 8  Strongly Disagree           Neutral           Neutral    Strongly Agree
## 9              Agree          Disagree    Strongly Agree             Agree
## 10             Agree          Disagree             Agree           Neutral
##            Opinion_5
## 1              Agree
## 2           Disagree
## 3            Neutral
## 4            Neutral
## 5           Disagree
## 6           Disagree
## 7  Strongly Disagree
## 8            Neutral
## 9              Agree
## 10          Disagree

1. Factorizing one by one


opinion_df2 <- opinion_df

## Opinion_1
opinion_df2$Opinion_1 <- factor(opinion_df2$Opinion_1,
                                levels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"),
                                labels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"))

## Opinion_2
opinion_df2$Opinion_2 <- factor(opinion_df2$Opinion_2,
                                levels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"),
                                labels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"))

## Sorry, I cannot continue with this replication

2. Using a for loop


opinion_df3 <- opinion_df

factor_function <- function(data,var){
  data[,var] <- factor(data[,var],
                  levels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"),
                  labels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"))
  
  return(data[,var])
}

for( i in 1:length(Questions)){
 opinion_df3[,Questions[i]] <- factor_function(opinion_df3,Questions[i])
 print(levels(opinion_df3[,Questions[i]]))
}
## [1] "Strongly Disagree" "Disagree"          "Neutral"          
## [4] "Agree"             "Strongly Agree"   
## [1] "Strongly Disagree" "Disagree"          "Neutral"          
## [4] "Agree"             "Strongly Agree"   
## [1] "Strongly Disagree" "Disagree"          "Neutral"          
## [4] "Agree"             "Strongly Agree"   
## [1] "Strongly Disagree" "Disagree"          "Neutral"          
## [4] "Agree"             "Strongly Agree"   
## [1] "Strongly Disagree" "Disagree"          "Neutral"          
## [4] "Agree"             "Strongly Agree"

3. Using lapply().

sapply() is kind of usually moody at times, and this was one of those days. So I used lapply(), and it works.

opinion_df <- data.frame(opinion_df)

opinion_df4 <- as.data.frame(lapply(opinion_df, function(x) factor(x,
            levels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"),
            labels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"))),check.names = FALSE)

levels(opinion_df3$Opinion_4)
## [1] "Strongly Disagree" "Disagree"          "Neutral"          
## [4] "Agree"             "Strongly Agree"

4. Using forcats::fct_relevel()


opinion_df5 <- opinion_df %>%
  dplyr::mutate_all(forcats::fct_relevel, "Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree")

levels(opinion_df5$Opinion_4)
## [1] "Strongly Disagree" "Disagree"          "Neutral"          
## [4] "Agree"             "Strongly Agree"

5. Using purrr::map()


The syntax is written as map(.x,.f) i.e for each element of .x, do .f

opinion_df6 <-bind_rows(map(opinion_df, ~factor(.x,
 levels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"),
 labels = c("Strongly Disagree", "Disagree","Neutral","Agree", "Strongly Agree"))))


levels(opinion_df6$Opinion_4)
## [1] "Strongly Disagree" "Disagree"          "Neutral"          
## [4] "Agree"             "Strongly Agree"
Senior Data Analyst

Related