Chris Bail
Duke University
website: https://www.chrisbail.net
Twitter: https://www.twitter.com/chris_bail
github: https://github.com/cbail
library(tidytext)
library(dplyr)
load(url("https://cbail.github.io/Trump_Tweets.Rdata"))
tidy_trump_tweets<- trumptweets %>%
select(created_at,text) %>%
unnest_tokens("word", text)
data("stop_words")
top_words<-
tidy_trump_tweets %>%
anti_join(stop_words) %>%
filter(!(word=="https"|
word=="rt"|
word=="t.co"|
word=="amp")) %>%
count(word) %>%
arrange(desc(n))
library(ggplot2)
top_words %>%
slice(1:20) %>%
ggplot(aes(x=reorder(word, -n), y=n, fill=word))+
geom_bar(stat="identity")+
theme_minimal()+
theme(axis.text.x =
element_text(angle = 60, hjust = 1, size=13))+
theme(plot.title =
element_text(hjust = 0.5, size=18))+
ylab("Frequency")+
xlab("")+
ggtitle("Most Frequent Words in Trump Tweets")+
guides(fill=FALSE)
tidy_trump_tfidf<- trumptweets %>%
select(created_at,text) %>%
unnest_tokens("word", text) %>%
anti_join(stop_words) %>%
count(word, created_at) %>%
bind_tf_idf(word, created_at, n)
top_tfidf<-tidy_trump_tfidf %>%
arrange(desc(tf_idf))
top_tfidf$word[1]
[1] "standforouranthem"
economic_dictionary<-c("economy","unemployment","trade","tariffs")
library(stringr)
economic_tweets <-
trumptweets %>%
filter(str_detect(text, economic_dictionary))
head(economic_tweets$text, 2)
[1] "Great talk with my friend President Mauricio Macri of Argentina this week. He is doing such a good job for Argentina. I support his vision for transforming his country’s economy and unleashing its potential!"
[2] "The Washington Post and CNN have typically written false stories about our trade negotiations with China. Nothing has happened with ZTE except as it pertains to the larger trade deal. Our country has been losing hundreds of billions of dollars a year with China..."
head(get_sentiments("bing"))
# A tibble: 6 x 2
word sentiment
<chr> <chr>
1 2-faces negative
2 abnormal negative
3 abolish negative
4 abominable negative
5 abominably negative
6 abominate negative
trump_tweet_sentiment <-
tidy_trump_tweets %>%
inner_join(get_sentiments("bing")) %>%
count(created_at, sentiment)
head(trump_tweet_sentiment)
# A tibble: 6 x 3
created_at sentiment n
<dttm> <chr> <int>
1 2017-02-05 22:49:42 positive 2
2 2017-02-06 03:36:54 positive 4
3 2017-02-06 12:01:53 negative 3
4 2017-02-06 12:01:53 positive 1
5 2017-02-06 12:07:55 negative 2
6 2017-02-06 16:32:24 negative 3
tidy_trump_tweets$date<-as.Date(
tidy_trump_tweets$created_at,
format="%Y-%m-%d %x")
trump_sentiment_plot <-
tidy_trump_tweets %>%
inner_join(get_sentiments("bing")) %>%
filter(sentiment=="negative") %>%
count(date, sentiment)
library(ggplot2)
ggplot(trump_sentiment_plot, aes(x=date, y=n))+
geom_line(color="red", size=.5)+
theme_minimal()+
theme(axis.text.x =
element_text(angle = 60, hjust = 1, size=13))+
theme(plot.title =
element_text(hjust = 0.5, size=18))+
ylab("Number of Negative Words")+
xlab("")+
ggtitle("Negative Sentiment in Trump Tweets")+
theme(aspect.ratio=1/4)
From Goncalves et al. (2013)
From Ribiero et al. (2016)
From Goncalves et al. (2013)