See dplyr
cheatsheet here.
Remark: There is conflict between the dplyr
package and
MASS
package, the select
function, in
particular, can cause problem. Remember to check your environment before
implement the program! Another solution is to add dplyr::
before the functions from dplyr
package. This can be
helpful especially when you are coding in a big team where people use
different packages and environments.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data("iris")
species <- unique(iris$Species)
## Manually created ID mapping
map <- data.frame(ID = 1:length(species), Species = species); map
## ID Species
## 1 1 setosa
## 2 2 versicolor
## 3 3 virginica
## Manually created Missing data
iris[1:3,2] = NA
## A peak of current dataset
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 NA 1.4 0.2 setosa
## 2 4.9 NA 1.4 0.2 setosa
## 3 4.7 NA 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
##### ROWS
df = iris %>%
mutate_all(~replace (., is.na(.), 0)) %>% ## Replace NA with 0
filter(!is.na(Sepal.Width)) %>% ## Select rows without NA in Sepcal.Width
mutate(Sepal.Length= replace(Sepal.Length, Sepal.Length>5, "large") ) %>%## Replace entries with condition
mutate(Sepal.Length= replace(Sepal.Length, Sepal.Length<=5, "small") ) %>%
mutate(Species = gsub(' ','', Species)) %>% ##
mutate(Species = tolower(Species)) ## toupper: uppercase, tolower: lowercase
## Warning in `[<-.factor`(`*tmp*`, list, value = 0): invalid factor level, NA
## generated
head(df)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 large 0.0 1.4 0.2 setosa
## 2 small 0.0 1.4 0.2 setosa
## 3 small 0.0 1.3 0.2 setosa
## 4 small 3.1 1.5 0.2 setosa
## 5 small 3.6 1.4 0.2 setosa
## 6 large 3.9 1.7 0.4 setosa
#### COLUMNS
df2 = iris%>%
right_join(map, by = "Species") %>% ## Join according to the right matrix
select(Sepal.Length, Petal.Width, ID) %>% ## Select columns
select( - c(Petal.Width) ) ## Delete columns
head(df2)
## Sepal.Length ID
## 1 5.1 1
## 2 4.9 1
## 3 4.7 1
## 4 4.6 1
## 5 5.0 1
## 6 5.4 1