Exercise 1

library(tidyverse)
library(tidytext)
library(gutenbergr)
library(igraph)
library(ggraph)

data(stop_words)

hound <- gutenberg_download(2852)

bigram_counts <- hound %>% 
                    unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
                    separate(bigram, c("word1", "word2"), sep = " ") %>%
                    filter(!word1 %in% stop_words$word) %>%
                    filter(!word2 %in% stop_words$word) %>%
                    filter(!is.na(word1)) %>% 
                    filter(!is.na(word2)) %>% 
                    count(word1, word2, sort=TRUE)

bigram_graph <- bigram_counts %>%
                  filter(n >= 3) %>%
                  graph_from_data_frame()

ggraph(bigram_graph, layout = "fr") +
     geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                    arrow = grid::arrow(type = "closed", length = unit(1, "mm")), 
                    end_cap = circle(0.5, "mm")) +
     geom_node_point(color = "lightblue", size = 0.5) +
     geom_node_text(aes(label = name), size = 1) +
     theme_void()

Exercise 2

lines <- readLines("https://chryswoods.com/text_analysis_r/hamlet.txt")
hamlet <- tibble(line=1:length(lines), text=lines)

bigram_counts <- hamlet %>% 
    unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
    separate(bigram, c("word1", "word2"), sep = " ") %>%
    filter(!is.na(word1)) %>% 
    filter(!is.na(word2)) %>% 
    count(word1, word2, sort=TRUE)

bigram_graph <- bigram_counts %>% 
                  graph_from_data_frame()

ggraph(bigram_graph, layout = "fr") +
    geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                   arrow = grid::arrow(type = "closed", length = unit(1, "mm")), 
                   end_cap = circle(0.5, "mm")) +
    geom_node_point(color = "lightblue", size = 0.5) +
    geom_node_text(aes(label = name), size = 1) +
    theme_void()

Return