First load the tidyverse, then read in all of the data and tidy it up…
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.3 ✔ purrr 0.3.4
## ✔ tibble 3.1.1 ✔ dplyr 1.0.5
## ✔ tidyr 1.1.3 ✔ stringr 1.4.0
## ✔ readr 1.4.0 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
temperature <- read_table(
"https://chryswoods.com/data_analysis_r/cetml1659on.txt",
skip=6,
na=c("-99.99", "-99.9"),
col_types=cols("DATE"=col_integer())
)
month_levels <- c("JAN", "FEB", "MAR", "APR", "MAY", "JUN",
"JUL", "AUG", "SEP", "OCT", "NOV", "DEC")
historical_temperature <- temperature %>%
select(-YEAR) %>%
pivot_longer(c("JAN", "FEB", "MAR", "APR", "MAY", "JUN",
"JUL", "AUG", "SEP", "OCT", "NOV", "DEC"),
names_to="month",
values_to="temperature") %>%
rename(year=DATE) %>%
mutate(month=factor(month, month_levels))
Next, add in the “decade” and “century” variables (columns) as we will be using them later…
historical_temperature["decade"] <- (historical_temperature["year"] %/% 10) * 10
historical_temperature["century"] <- (historical_temperature["year"] %/% 100) * 100
historical_temperature
Graph that shows the average maximum temperature per month…
ggplot(historical_temperature %>%
group_by(month) %>%
summarise(max_temp=max(temperature, na.rm=TRUE), .groups="drop"),
aes(x=month, y=max_temp, group="month")) +
geom_line()
Change in average temperature in December per decade.
ggplot( historical_temperature %>%
filter(month=="DEC") %>%
group_by(decade) %>%
summarise(average=mean(temperature, na.rm=TRUE), .groups="drop"),
aes(x=decade, y=average)
) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
Change in average temperature per century, plus a smooth trend line
ggplot( historical_temperature %>%
group_by(century) %>%
summarise(average=mean(temperature, na.rm=TRUE), .groups="drop"),
aes(x=century, y=average)) +
geom_point() +
geom_smooth(span=2.0)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'