Session 3 of 4: Data visualisation
Task
session3 or data_vis).Task
Task
head() and at least one other function to check to see things look okay.| Predictor variable (x) | Response variable (y) | |
|---|---|---|
| Barplot, Boxplot, Violinplot | Categorical | Numeric |
| Scatterplot | Numeric | Numeric |
| Density, Histogram | Numeric | - |
ggplot()ggplot() is a function from the ggplot2 package (it’s part of the tidyverse collection of packages)# Two ways transform data
# transform before input into the ggplot
sizes_byspp %>%
mutate(log_tot_abun = log(total_abundance)) %>%
ggplot(aes(x = mean_size, y = log_tot_abun)) +
geom_point()
# perform the transformation in the ggplot
sizes_byspp %>%
ggplot(aes(x = mean_size, y = total_abundance)) +
geom_point() +
scale_y_log10()
# what are the two differences between these two? (think: visual and data)# Changing the transparency of the point (= alpha)
sizes_byspp %>%
ggplot(aes(x = mean_size, y = total_abundance)) +
geom_point(alpha = 0.5) + # alpha argument changes transparency
scale_y_log10()
# Changing the type of point (= pch)
sizes_byspp %>%
ggplot(aes(x = mean_size, y = total_abundance)) +
geom_point(pch = 21) + # pch argument changes point type (Google: "pch in r")
scale_y_log10()Task
# For a barplot you might want the bar to represent the mean or median.
# how do barplots differ from violin or boxplots?
nspp_bysurv_bysite %>%
filter(site_code %in% selected_sites) %>%
summarise(
mean_diversity = mean(n_species, na.rm = TRUE),
.by = c(site_code, method)
) %>%
ggplot() +
aes(x = site_code, y = mean_diversity, fill = as.factor(method)) +
geom_col(position = "dodge") +
labs(fill = "Method", x = "Site Code", y = "Mean number of species")# Barplots need error bars
nspp_bysurv_bysite %>%
filter(site_code %in% selected_sites) %>%
summarise(
mean_diversity = mean(n_species, na.rm = TRUE),
sd_diversity = sd(n_species, na.rm = TRUE),
.by = c(site_code, method)
) %>%
ggplot() +
aes(x = site_code, y = mean_diversity, fill = as.factor(method)) +
geom_col(position = position_dodge(width = 1)) +
geom_errorbar(
aes(
ymin = mean_diversity - sd_diversity,
ymax = mean_diversity + sd_diversity
),
width = 0.2,
position = position_dodge(width = 1)
) +
labs(fill = "Method", x = "Site Code", y = "Mean number of species")# Making pretty plots
nspp_bysurv_bysite %>%
filter(site_code %in% selected_sites) %>%
summarise(
mean_diversity = mean(n_species, na.rm = TRUE),
sd_diversity = sd(n_species, na.rm = TRUE),
.by = c(site_code, method)
) %>%
ggplot() +
aes(x = site_code, y = mean_diversity, fill = as.factor(method)) +
geom_col(position = position_dodge(width = 1)) +
geom_errorbar(
aes(
ymin = mean_diversity - sd_diversity,
ymax = mean_diversity + sd_diversity
),
width = 0.2,
position = position_dodge(width = 1)
) +
labs(fill = "Method", x = "Site Code", y = "Mean number of species") +
scale_fill_brewer(palette = "Set1") + #https://r-graph-gallery.com/38-rcolorbrewers-palettes.html
theme_classic() # https://ggplot2.tidyverse.org/reference/ggtheme.html# Change the order of the geom_line() and geom_point()
nspp_bysurv_bysite %>%
mutate(year = year(survey_date)) %>%
summarise(mean_diversity = mean(n_species, na.rm = TRUE), .by = c(year, method)) %>%
ggplot() +
aes(x = year, y = mean_diversity) +
geom_line(aes(group = as.factor(method))) +
geom_point(aes(col = as.factor(method)), size = 5)