7. > head(talks_en)
# A tibble: 6 x 2
title_en transcript_en
<chr> <chr>
1 Fake videos of real people — and how to … Look at these images. Now…
2 How to build synthetic DNA and send it a… "Alright, let me tell you…
3 Technology that knows what you're feeling "What happens when techno…
4 How to get empowered, not overpowered, b… "After 13.8 billion years…
5 Where joy hides and how to find it "It's 2008, and I'm just …
6 ""You Found Me"" (Cello music starts) You …
9. > talks_en %>% tidytext::unnest_tokens(word, transcript_en)
# A tibble: 9,840 x 2
title_en word
<chr> <chr>
1 Fake videos of real people — and how to spot them look
2 Fake videos of real people — and how to spot them at
3 Fake videos of real people — and how to spot them these
4 Fake videos of real people — and how to spot them images
5 Fake videos of real people — and how to spot them now
6 Fake videos of real people — and how to spot them tell
7 Fake videos of real people — and how to spot them me
8 Fake videos of real people — and how to spot them which
9 Fake videos of real people — and how to spot them obama
10 Fake videos of real people — and how to spot them here
10.
11. > talks_ja %>% head()
# A tibble: 6 x 2
title_ja transcript_ja
<chr> <chr>
1 … …
2 DNA … …
3 …
4 AI AI … 138 …
5 2008 …
6 You Found Me … …
12. > talks_ja %>% tidytext::unnest_tokens(word, transcript_ja)
# A tibble: 6,266,182 x 2
title_ja word
<chr> <chr>
1
2
3
4
5
6
# ... with 6,266,172 more rows
22. > tokens_ja
# A tibble: 6,483,469 x 3
title word hinshi
<chr> <chr> <chr>
1
2
3
4
5
6
# ... with 6,483,463 more rows
23.
24. > bigram_en <- talks_en %>% select(title_en, transcript_en) %>%
+ tidytext::unnest_tokens(bigram, transcript_en, token = "ngrams",
n = 2)
> head(bigram_en)
# A tibble: 6 x 2
title_en bigram
<chr> <chr>
1 ""(Nothing But) Flowers" with string quartet" music here
2 ""(Nothing But) Flowers" with string quartet" here we
3 ""(Nothing But) Flowers" with string quartet" we stand
4 ""(Nothing But) Flowers" with string quartet" stand like
5 ""(Nothing But) Flowers" with string quartet" like an
6 ""(Nothing But) Flowers" with string quartet" an adam
25.
26. > bigram_ja <- talks_ja %>%
+ as.data.frame() %>%
+ docDF(col = "transcript_ja", type=1, N = 2)
number of extracted terms = 898167
now making a data frame. wait a while!