Chris Bail
Duke University
website: https://www.chrisbail.net
Twitter: https://www.twitter.com/chris_bail
github: https://github.com/cbail
library(tidytext)
library(dplyr)
load(url("https://cbail.github.io/Trump_Tweets.Rdata"))
tidy_trump_tweets<- trumptweets %>%
select(created_at,text) %>%
unnest_tokens("word", text)
data("stop_words")
top_words<-
tidy_trump_tweets %>%
anti_join(stop_words) %>%
filter(!(word=="https"|
word=="rt"|
word=="t.co"|
word=="amp")) %>%
count(word) %>%
arrange(desc(n))
library(ggplot2)
top_words %>%
slice(1:20) %>%
ggplot(aes(x=reorder(word, -n), y=n, fill=word))+
geom_bar(stat="identity")+
theme_minimal()+
theme(axis.text.x =
element_text(angle = 60, hjust = 1, size=13))+
theme(plot.title =
element_text(hjust = 0.5, size=18))+
ylab("Frequency")+
xlab("")+
ggtitle("Most Frequent Words in Trump Tweets")+
guides(fill=FALSE)
tidy_trump_tfidf<- trumptweets %>%
select(created_at,text) %>%
unnest_tokens("word", text) %>%
anti_join(stop_words) %>%
count(word, created_at) %>%
bind_tf_idf(word, created_at, n)
top_tfidf<-tidy_trump_tfidf %>%
arrange(desc(tf_idf))
top_tfidf$word[1]
[1] "standforouranthem"