Session 4 of 4: Combining learned skills
aes() function in ggplot()?Using R markdown (.Rmd)
as.numeric(), as.character(), as.factor() (look out for errors when converting)%in% is like == but for multiple comparisonscase_when() is like ifelse() but more flexiblestat_smooth() puts a generic line through the dataplot_dat %>%
mutate(
sex = case_when(
sex == "g" ~ "f", # we assume the 'g' should be an 'f'
TRUE ~ sex
)
) %>%
ggplot() +
aes(x = sst, y = log1p_abun) +
geom_point(alpha = 0.5) + # transparency because of overlap
stat_smooth(se = TRUE) + # adds a generic smooth to the plot (with or w/o error)
facet_wrap(~sex) # I want to look at the each sex separately
# stat_smooth(method = "lm") to make linear line# Packages ----------------------------------------------------------------
library(tidyverse) # where the functions are stored
# if package does not exist: `install.packages()` first then, `library()`
# Data import -------------------------------------------------------------
read_csv() # import data in csv format
# Data checking -----------------------------------------------------------
head() # first six rows
View() # view tibble in new window
glimpse() # view structure of data
# Data manipulation -------------------------------------------------------
filter() # subsetting the data (e.g., order == "Aves")
select() # Selecting a single or multiple columns
mutate() # Creating a new column
rename() # Renaming a column
summarise(.by = ) # Summarising a variable (e.g., taking a mean)
arrange() # Ordering a column (E.g., sort by smallest arrival time)
arrange(desc()) # ... or by largest arrival time
distinct() # give me only the unique rows (no repeats)
pull() # pull out a single column and make it a vector
count(.by = ) # count the number of rows per group
# Data visualisation ------------------------------------------------------
ggplot() +
aes(x = column1, y = column2, colour = column3) +
geom_point() # or geom_violin() or geom_boxplot() or geom_XXXX()Task
Download parquet file from here: AODN_output.parquet
# the RLS data would be too big to download from online
# lets download it and then convert it to a csv
install.packages("arrow")
library(arrow)
# convert from one filetype to another
read_parquet("AODN_output.parquet") %>%
write_csv("AODN_output.csv")
# read in the csv as you normally would
rls_raw <-
read_csv("AODN_output.csv")# the 'datasets' package is a collection of example datasets
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html
install.packages("datasets")
library(datasets)
# E.g.
raw_data <- datasets::PlantGrowth
raw_data <- datasets::pressure
raw_data <- datasets::co2
# or -----------------------------------------------
install.packages("nycflights13")
library(nycflights13)
raw_data <- nycflights13::flights
# or -----------------------------------------------
install.packages("babynames")
library(babynames)
raw_data <- babynames::babynamesFrom raw data, to cleaning, manipulating, and visualising in R