The following visualization compares median VADER compound sentiment analysis scores calculated in and exported from the Portfolio-Nigeria_News_LDA_&Sentiment_Analysis.ipynb file that is also located in the folder “nlp-topic_modeling&_sentiment_analysis-nigeria_news-2019” in my GitHub “portfolio” repository:


library(readr)
library(tidyverse)
library(ggplot2)
library(ggthemes)

setwd("C:/Users/rsb84/Desktop/RB/portfolio_data/nigeria_news/")

vader_median = read_csv("vader_median.csv")


#Creating levels dictates the order of the faceted graphs. Otherwise they will seem to appear in haphazard order.

vader_median$sentences_containing <- factor(vader_median$sentences_containing, 
                                            levels = c('All Words (Baseline)', 
                                                       '"Election or "Elections"', 
                                                       '"Protest", "Protests", "Protester" or "Protesters"',
                                                       '"Rights", "Freedom", "Liberty" or "Liberties"', 
                                                       '"Government"', 
                                                       '"Nigerian Army"', 
                                                       '"Police"', 
                                                       '"Justice System", "Judicial System", "Judiciary", "Court System", "Court" or "Courts"', 
                                                       '"Boko Haram"', 
                                                       '"Islamic State", "ISIS" or "ISIL"', 
                                                       '"Safety"', 
                                                       '"Violence"'))


#Note: Inside theme() when I add strip.text.x = element_text(size = 13, face = "bold"), it changes the size of the font for the faceted group titles.


cols=c("Quarter 1, 2019" = "#0099DF", "Quarter 2, 2019" = "#e7c060", "Quarter 3, 2019" = "#7c13c8", "Quarter 4, 2019" = "#1ea000")

median_grouped_bar_chart = ggplot(vader_median, 
                                  aes(x = quarter, y = vader_score, fill = quarter)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  # scale_x_discrete(name = "All Sentences that Contain the Above Specified Words",
  #                 labels = function(x) str_wrap(x, width = 60)) +
  scale_x_discrete(name = "",
                  labels = function(x) str_wrap(x, width = 60)) +
  scale_y_continuous("VADER Sentiment Compound Score", 
                     labels = c(-1, -0.75,-0.5,-0.25,0.00, 0.25, 0.50), 
                     breaks = c(-1, -0.75,-0.5,-0.25,0.00, 0.25, 0.50)) + 
  scale_colour_manual(values = cols, 
                      guide=NULL, 
                      labels = c("Quarter 1, 2019", "Quarter 2, 2019", "Quarter 3, 2019", "Quarter 4, 2019")) +
  theme_wsj() + 
  theme(plot.title = element_text(hjust = 0.5), 
        plot.subtitle=element_text(hjust=0.5, size = 12, color="black"), 
        title =element_text(size=20, color= "steelblue", face='bold'), 
        axis.title.x=element_text(size=16, vjust = -0.3), 
        strip.text.x = element_text(size = 13, face = "bold"), 
        axis.title.y=element_blank(), 
        axis.text.x = element_blank(), 
        axis.text.y = element_text(size = 14, face = 'bold'), 
        legend.title = element_blank(), 
        legend.text = element_text(size = 14)) + 
  labs(title="Median VADER Compound Sentiment Scores for Nigerian News \nSentences with the Following Words, by 2019 Quarter") +
  facet_wrap(~sentences_containing, 
             labeller = label_wrap_gen(width = 34, multi_line = TRUE))

median_grouped_bar_chart