Exercise 1
library(tidyverse)
library(tidytext)
library(gutenbergr)
library(igraph)
library(ggraph)
data(stop_words)
hound <- gutenberg_download(2852)
bigram_counts <- hound %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
filter(!is.na(word1)) %>%
filter(!is.na(word2)) %>%
count(word1, word2, sort=TRUE)
bigram_graph <- bigram_counts %>%
filter(n >= 3) %>%
graph_from_data_frame()
ggraph(bigram_graph, layout = "fr") +
geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
arrow = grid::arrow(type = "closed", length = unit(1, "mm")),
end_cap = circle(0.5, "mm")) +
geom_node_point(color = "lightblue", size = 0.5) +
geom_node_text(aes(label = name), size = 1) +
theme_void()

Exercise 2
lines <- readLines("https://chryswoods.com/text_analysis_r/hamlet.txt")
hamlet <- tibble(line=1:length(lines), text=lines)
bigram_counts <- hamlet %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!is.na(word1)) %>%
filter(!is.na(word2)) %>%
count(word1, word2, sort=TRUE)
bigram_graph <- bigram_counts %>%
graph_from_data_frame()
ggraph(bigram_graph, layout = "fr") +
geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
arrow = grid::arrow(type = "closed", length = unit(1, "mm")),
end_cap = circle(0.5, "mm")) +
geom_node_point(color = "lightblue", size = 0.5) +
geom_node_text(aes(label = name), size = 1) +
theme_void()
