Задание 3.1
library("tidyverse")
## -- Attaching packages -------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ----------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data_all <- jsonlite::read_json("C:/R/HW_03/data/fpl_data_2018_2019.json")
data_all %>%
map("Goals") %>%
unlist() %>%
enframe() %>%
mutate(goals = as.numeric(value)) %>%
select(name, goals) -> data_goals
data_all %>%
map("Club") %>%
unlist() %>%
enframe() %>%
mutate(club = value) %>%
select(name, club) -> data_clubs
full_join(data_clubs, data_goals) %>%
arrange(desc(goals)) %>%
slice(1:30) -> data_end
## Joining, by = "name"
data_end %>%
ggplot(aes(fct_reorder(name,goals), goals, fill = club)) + # fill раскрашивает на основе переменной
geom_col() +
coord_flip() +
labs(x = "",
y = "",
title = "Number of Goals",
caption = "data from www.premierleague.com")

Задание 3.2
library(tidyverse)
library(gutenbergr)
gutenberg_metadata %>%
filter(str_detect(title,"Женское международное движение: Сборник статей"))
## # A tibble: 1 x 8
## gutenberg_id title author gutenberg_autho~ language gutenberg_books~
## <int> <chr> <chr> <int> <chr> <chr>
## 1 37196 Женс~ Vario~ 116 ru <NA>
## # ... with 2 more variables: rights <chr>, has_text <lgl>
book <- gutenberg_download(37196)
## Determining mirror for Project Gutenberg from http://www.gutenberg.org/robot/harvest
## Using mirror http://aleph.gutenberg.org
stop_rus_wors <- read_csv("C:/R/HW_03/data/stopwords-ru.csv")
## Parsed with column specification:
## cols(
## word = col_character()
## )
library(tidytext)
book %>%
unnest_tokens(word, text) %>%
count(word, sort = TRUE) %>%
anti_join(stop_rus_wors) %>%
slice(1:20) -> words_end
## Joining, by = "word"
words_end %>%
ggplot(aes(fct_reorder(word, n), n)) +
geom_col() +
coord_flip() +
labs(x = "",
y = "",
title = "'Женское международное движение: Сборник статей': частотные слова",
caption = "source www.gutenberg.org")

Задание 3.3
library(tidyverse)
library(gutenbergr)
library(tidytext)
library(udpipe)
gutenberg_metadata %>%
filter(str_detect(title,"Красавице, которая нюхала табак"))
## # A tibble: 1 x 8
## gutenberg_id title author gutenberg_autho~ language gutenberg_books~
## <int> <chr> <chr> <int> <chr> <chr>
## 1 5316 Крас~ Pushk~ 1457 ru <NA>
## # ... with 2 more variables: rights <chr>, has_text <lgl>
book_1 <- gutenberg_download(5316)
rumodel <- udpipe_download_model(language = "russian-syntagrus")
## Downloading udpipe model from https://raw.githubusercontent.com/jwijffels/udpipe.models.ud.2.4/master/inst/udpipe-ud-2.4-190531/russian-syntagrus-ud-2.4-190531.udpipe to C:/R/HW_03/russian-syntagrus-ud-2.4-190531.udpipe
## Visit https://github.com/jwijffels/udpipe.models.ud.2.4 for model license details
str_c(book_1$text, collapse = " ") -> long_line
udpipe(long_line, object = rumodel) -> book_data
book_data %>%
mutate(upos = str_replace_all(upos, "DET", "PART")) %>%
mutate(upos = case_when(
str_detect(lemma, "быть") ~ "VERB",
str_detect(lemma, "бы") ~ "PART",
TRUE ~ upos)) %>%
count(upos, sort = TRUE) %>%
ggplot(aes(fct_reorder(upos, n), n)) +
geom_col() +
labs(x = "",
y = "",
title = "Красавице, которая нюхала табак")
