Duration: ~30 Minutes
5 Minute Break
R is ludicrously powerful and complex,
with many ways to do the same thing.
Today's goal - learn different ways to do the same things.
Set of packages that restructure how R code works.
Includes:
%>%
Creator Hadley Wickham
Documentation on
CRAN Page
Base R
foo_foo <- little_bunny()
foo_foo <- hop(foo_foo,
through = forest)
foo_foo <- scoop(foo_foo,
up = field_mice)
foo_foo <- bop(foo_foo,
on = head)
Tidyverse
foo_foo <- little_bunny() %>%
hop(through = forest) %>%
scoop(up = field_mice) %>%
bop(on = head)
Example from R for Data Science
To select rows based on some condition, use filter()
.
Base R
data(iris)
my_iris <-
iris[iris$Species == "setosa" &
iris$Petal.Width < 0.5, ]
Tidyverse
data(iris)
my_iris <- iris %>%
filter(
Species == "setosa",
Petal.Width < 0.5
)
To choose specific columns, use
select()
.
To choose all columns except x
,
use select(-x)
.
Base R
my_iris <-
iris[, c("Species",
"Sepal.Length",
"Sepal.Width")]
Tidyverse
my_iris <- iris %>%
select(
Species,
Sepal.Length,
Sepal.Width
)
To sort by one or more columns, use
arrange()
.
Base R
my_iris <-
iris[order(iris$Sepal.Width,
-iris$Sepal.Length), ]
Tidyverse
my_iris <- iris %>%
arrange(
Sepal.Width,
desc(Sepal.Length)
)
To rename a column, use rename()
.
Base R
my_iris <- iris
names(my_iris)[1:2] <-
c("slength", "swidth")
Tidyverse
my_iris <- iris %>%
rename(
swidth = Sepal.Width,
slength = Sepal.Length
)
To create a new variable, use mutate()
.
Base R
data(iris)
my_iris <- iris
my_iris$Sepal.Ratio <-
my_iris$Sepal.Length /
my_iris$Sepal.Width
my_iris$Petal.Ratio <-
my_iris$Petal.Length /
my_iris$Petal.Width
Tidyverse
data(iris)
my_iris <- iris %>%
mutate(
Sepal.Ratio = Sepal.Length /
Sepal.Width,
Petal.Ratio = Petal.Length /
Petal.Width
)
filter()
Filter only versicolor irises using pipes.
my_iris <- iris %>%
filter(Species == "versicolor")
To check your code, try head()
or summary()
Versicolor Iris
mutate()
In your subset, create Ratio.Length
as a
ratio of Petal.Length
and
Sepal.Length
using pipes.
my_iris <- my_iris %>%
mutate(
Ratio.Length = Petal.Length /
Sepal.Length
)
Diagram of petal and sepal dimensions
select()
Select only length variables from your subset using pipes.
my_iris <- my_iris %>%
subset(Petal.Length,
Sepal.Length,
Ratio.Length)
Setosa Iris
arrange()
Arrange your subset by Ratio.Length
using pipes.
my_iris <- my_iris %>% arrange(Ratio.Length)
Now arrange by Ratio.Length
in descending order.
my_iris <- my_iris %>% arrange(desc(Ratio.Length))
rename()
In your subset, rename Petal.Length
to
petallength
using pipes.
my_iris <- my_iris %>%
rename(petallength = Petal.Length)
Virginica Iris
Now chain together the steps in the previous slides:
Ratio.Length
from Petal.Length
and Sepal.Length
.Ratio.Length
in descending order.Petal.Length
to petallength
.Code is on the next slide.
my_iris <- iris %>%
filter(Species == "versicolor") %>%
mutate(Ratio.Length = Petal.Length / Sepal.Length) %>%
select(Petal.Length, Sepal.Length, Ratio.Length) %>%
arrange(desc(Ratio.Length)) %>%
rename(petallength = Petal.Length)
Next up: tidyverse practice
But first: 5 minute break