dplyr Cheatsheet

See dplyr cheatsheet here. Remark: There is conflict between the dplyr package and MASS package, the select function, in particular, can cause problem. Remember to check your environment before implement the program! Another solution is to add dplyr:: before the functions from dplyr package. This can be helpful especially when you are coding in a big team where people use different packages and environments.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data("iris")
species <- unique(iris$Species)
## Manually created ID mapping
map <- data.frame(ID = 1:length(species), Species = species); map

##   ID    Species
## 1  1     setosa
## 2  2 versicolor
## 3  3  virginica

## Manually created Missing data
iris[1:3,2] = NA
## A peak of current dataset
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1          NA          1.4         0.2  setosa
## 2          4.9          NA          1.4         0.2  setosa
## 3          4.7          NA          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

##### ROWS
df = iris %>% 
  mutate_all(~replace (., is.na(.), 0)) %>% ## Replace NA with 0 
  filter(!is.na(Sepal.Width)) %>% ## Select rows without NA in Sepcal.Width
  mutate(Sepal.Length= replace(Sepal.Length, Sepal.Length>5, "large") ) %>%## Replace entries with condition
  mutate(Sepal.Length= replace(Sepal.Length, Sepal.Length<=5, "small") )  %>% 
  mutate(Species = gsub(' ','', Species)) %>% ## 
  mutate(Species = tolower(Species)) ## toupper: uppercase, tolower: lowercase

## Warning in `[<-.factor`(`*tmp*`, list, value = 0): invalid factor level, NA
## generated

head(df)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1        large         0.0          1.4         0.2  setosa
## 2        small         0.0          1.4         0.2  setosa
## 3        small         0.0          1.3         0.2  setosa
## 4        small         3.1          1.5         0.2  setosa
## 5        small         3.6          1.4         0.2  setosa
## 6        large         3.9          1.7         0.4  setosa

#### COLUMNS
df2 = iris%>%
  right_join(map, by = "Species") %>% ## Join according to the right matrix
  select(Sepal.Length, Petal.Width, ID) %>% ## Select columns
  select( - c(Petal.Width) ) ## Delete columns
head(df2)

##   Sepal.Length ID
## 1          5.1  1
## 2          4.9  1
## 3          4.7  1
## 4          4.6  1
## 5          5.0  1
## 6          5.4  1