Associated Material
Module: Module 06 - Tidying data
Readings
as.numeric
janitor
package to create cleaned dataframe
column namesTidy data principles:
pivot_longer
from the tidyr
packagepivot_wider
from the tidyr
package
library(tidyverse)
rodents <- read_csv("data/rodents_untidy.csv")
#> Rows: 41 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (6): Plot location, Date collected, Family, Genus, Species, Weight
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# a)
rodents <- rodents %>% janitor::clean_names(case = "small_camel")
# b)
rodents$weight[rodents$weight %in% c("-999", "?")] <- NA
rodents$weight <- as.numeric(rodents$weight)
# c)
rodents <- rodents %>% separate(plotLocation, into = c("plot", "location"), sep = "_")
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 7 rows [35, 36, 37, 38,
#> 39, 40, 41].
# d)
rodents$plot <- as.numeric(rodents$plot)
# e)
plot1_2 <- rodents %>%
filter(plot ==1 | plot == 2)
plot_3 <- rodents %>%
filter(plot == 3) %>%
select(-genus) %>%
separate(species, into = c("genus", "species"), sep=" ")
rodents_clean <- rbind(plot1_2, plot_3)
rodents_clean
#> # A tibble: 41 × 7
#> plot location dateCollected family genus species weight
#> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
#> 1 1 slope 01/09/14 Heteromyidae Dipodomys merriami 40
#> 2 1 slope 01/09/14 Heteromyidae Dipodomys merriami 36
#> 3 1 slope 01/09/14 Heteromyidae Dipodomys spectabilis 135
#> 4 1 rocks 01/09/14 Heteromyidae Dipodomys merriami 39
#> 5 1 grass 01/20/14 Heteromyidae Dipodomys merriami 43
#> 6 1 rocks 01/20/14 Heteromyidae Dipodomys spectabilis 144
#> 7 1 rocks 03/13/14 Heteromyidae Dipodomys merriami 51
#> 8 1 fence 03/13/14 Heteromyidae Dipodomys merriami 44
#> 9 1 fence 03/13/14 Heteromyidae Dipodomys spectabilis 146
#> 10 2 rocks 01/08/14 Cricetidae Neotoma albigula NA
#> # ℹ 31 more rows
library(tidyverse)
gapminder_yearly_pop <- read_csv("data/gapminder_yearly_population_millions_total.csv") %>% mutate(across(-country, ))
gapminder_yearly_pop_long <- gapminder_yearly_pop %>% pivot_longer(-country, names_to = "year", values_to = "population_millions") %>% mutate(year = as.numeric(year))
# a)
nz_1900_2000 <- gapminder_yearly_pop_long %>%
filter(between(year, 1900, 2000)) %>%
filter(country == "New Zealand")
# b)
jamaica_1900_2000 <- gapminder_yearly_pop_long %>%
filter(between(year, 1900, 2000)) %>%
filter(country == "Jamaica")