Duration: ~20 Minutes
Summarize()
Base R has many ways to summarize data.
With summarize()
, get code that is
Tidyverse is sometimes longer,
but always clearer.
Base R
colMeans(iris[, 1:2])
# or more explicitly,
colMeans(
iris[, c("Sepal.Length",
"Sepal.Width")] )
# Sepal.Length Sepal.Width
# 5.843333 3.057333
Tidyverse
iris %>% summarize(
mean_slength =
mean(Sepal.Length),
mean_swidth =
mean(Sepal.Width)
)
# mean_slength mean_swidth
# 1 5.843333 3.057333
Base R
data.frame(mean_petal_length =
c(setosa = mean(
iris$Petal.Length
[iris$Species=="setosa"]
), versicolor = mean(
iris$Petal.Length
[iris$Species=="versicolor"]
), virginica = mean(
iris$Petal.Length
[iris$Species=="virginica"]
)))
# mean_petal_length
# setosa 1.462
# versicolor 4.260
# virginica 5.552
Tidyverse
iris %>% group_by(Species) %>%
summarize(
mean_petal_length =
mean(Petal.Length)
) %>% ungroup()
# Species mean_petal_length
# 1 setosa 1.46
# 2 versicolor 4.26
# 3 virginica 5.55
summarize()
Calculate the sums of Petal.Length
and
Petal.Width
.
Save your results to a dataset.
my_iris <- iris %>% summarize(
sum_petal_length = sum(Petal.Length),
sum_petal_width = sum(Petal.Width)
)
group_by()
Calculate the sums and of Petal.Length
and
Petal.Width
grouped by Species
.
Save your results to a dataset.
my_iris <- iris %>% group_by(Species) %>%
summarize(
sum_petal_length = sum(Petal.Length),
sum_petal_width = sum(Petal.Width)
) %>% ungroup()
Summarize across rows instead of columns.
Base R
rowMeans(iris[, 1:4])
# [1] 2.550 2.375 2.350 2.350
# [5] 2.550 2.850 2.425 2.525
# [9] 2.225 2.400 ...
Tidyverse
iris %>%
select(Sepal.Length,
Sepal.Width,
Petal.Length,
Petal.Width) %>%
rowMeans()
But what if we are calculating something more complex than means?
rowwise()
In the code below, replace mean
with your function
of choice.
iris %>% rowwise() %>%
mutate(
mean = mean(c(Sepal.Length,
Sepal.Width,
Petal.Length,
Petal.Width))
)
What happens if you leave out rowwise()
?
Now we are switching to cars.
## Easy, but limited
c(n_rows = nrow(cars),
colMeans(cars))
n_rows speed dist
50.00 15.40 42.98
## More complex, more powerful
cars %>%
summarize(n_rows = n(),
speed = mean(speed),
dist = mean(dist)
)
n_rows speed dist
1 50 15.4 42.98
Next up: Summarize practice