import pandas as pd
df = pd.read_csv("newsfeed.csv")
df.shape
(24394, 2)
df.columns
Index(['week', 'text'], dtype='object')
def quarters(x):
if x <= 13:
return 1
elif x <= 26:
return 2
elif x <= 39:
return 3
else:
return 4
df['quarter'] = df['week'].apply(quarters)
df.columns
Index(['week', 'text', 'quarter'], dtype='object')
df_q1 = df.loc[(df['quarter']==1)].copy()
df_q2 = df.loc[(df['quarter']==2)].copy()
df_q3 = df.loc[(df['quarter']==3)].copy()
df_q4 = df.loc[(df['quarter']==4)].copy()
news = df['text'].copy()
import nltk
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to [nltk_data] C:\Users\rsb84\AppData\Roaming\nltk_data... [nltk_data] Package stopwords is already up-to-date!
True
def clean_text_tokenize(text_in):
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
ps = PorterStemmer()
sw = stopwords.words('english')
sw = set(sw)
new_sw = {"said", "says", "saying", "say", "us", "since", "like", "likes", "people"}
sw = sw.union(new_sw)
no_brackets = re.sub(r"[\([{})\]]", "", text_in)
clean_text = re.sub('[^A-z]+', " ", no_brackets).lower().split()
clean_text = [word for word in clean_text if word not in sw]
clean_text = [ps.stem(word) for word in clean_text]
return clean_text
df['text_lda_tokenized']=df.text.apply(clean_text_tokenize)
df['text_lda_tokenized'].shape
(24394,)
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import gensim
from gensim.models import CoherenceModel, LdaModel
from gensim import corpora
import gensim.corpora as corpora
dirichlet_dict = corpora.Dictionary(df.text_lda_tokenized)
bow_corpus = [dirichlet_dict.doc2bow(text) for text in df.text_lda_tokenized]
# Considering 1-20 topics:
num_topics = list(range(21)[1:])
num_keywords = 10
LDA_models = {}
LDA_topics = {}
for i in num_topics:
LDA_models[i] = LdaModel(corpus=bow_corpus,
id2word=dirichlet_dict,
num_topics=i,
update_every=1,
chunksize=len(bow_corpus),
passes=15,
alpha='auto',
random_state=42)
shown_topics = LDA_models[i].show_topics(num_topics=i,
num_words=num_keywords,
formatted=False)
LDA_topics[i] = [[word[0] for word in topic[1]] for topic in shown_topics]
def jaccard_similarity(topic_1, topic_2):
"""
This function derives the Jaccard similarity of two topics
Jaccard similarity:
- A measurement used for determining the level of similarity or diverence between two sets, e.g., text documents
- J(A,B) = (A ∩ B)/(A ∪ B)
- The lower the Jaccard Similarity score the better as a lower score means less topic overlap.
A score of 0 means two sets share nothing in common, while a score of 1 means they are identical.
"""
intersection = set(topic_1).intersection(set(topic_2))
union = set(topic_1).union(set(topic_2))
return float(len(intersection))/float(len(union))
LDA_stability = {}
for i in range(0, len(num_topics)-1):
jaccard_sims = []
for t1, topic1 in enumerate(LDA_topics[num_topics[i]]):
sims = []
for t2, topic2 in enumerate(LDA_topics[num_topics[i+1]]):
sims.append(jaccard_similarity(topic1, topic2))
jaccard_sims.append(sims)
LDA_stability[num_topics[i]] = jaccard_sims
mean_stabilities = [np.array(LDA_stability[i]).mean() for i in num_topics[:-1]]
coherences = [CoherenceModel(model=LDA_models[i], texts=df.text_lda_tokenized, dictionary=dirichlet_dict, coherence='c_v').get_coherence()\
for i in num_topics[:-1]]
coh_sta_diffs = [coherences[i] - mean_stabilities[i] for i in range(num_keywords)[:-1]] # This limits topic numbers to the prespecified number of keywords
coh_sta_max = max(coh_sta_diffs)
coh_sta_max_idxs = [i for i, j in enumerate(coh_sta_diffs) if j == coh_sta_max]
ideal_topic_num_index = coh_sta_max_idxs[0] # This chooses the lower topic number in case there are more than one maximum distances between the "topic coherence score" and "average topic overlap"
ideal_topic_num = num_topics[ideal_topic_num_index]
plt.figure(figsize=(20,10))
ax = sns.lineplot(x=num_topics[:-1], y=mean_stabilities, label='Average Topic Overlap')
ax = sns.lineplot(x=num_topics[:-1], y=coherences, label='Topic Coherence')
ax.axvline(x=ideal_topic_num, label='Ideal Number of Topics', color='black')
ax.axvspan(xmin=ideal_topic_num - 1, xmax=ideal_topic_num + 1, alpha=0.5, facecolor='grey')
y_max = max(max(mean_stabilities), max(coherences)) + (0.10 * max(max(mean_stabilities), max(coherences)))
ax.set_ylim([0, y_max])
ax.set_xlim([1, num_topics[-1]-1])
ax.axes.set_title('Model Metrics per Number of Topics', fontsize=25)
ax.set_ylabel('Metric Level', fontsize=20)
ax.set_xlabel('Number of Topics', fontsize=20)
plt.legend(fontsize=20)
plt.show()
import gensim
import gensim.corpora as corpora
id2word = corpora.Dictionary(df.text_lda_tokenized) #This creates a corpa out of the tokenized text you provide
corpus = [id2word.doc2bow(text) for text in df.text_lda_tokenized]
#This identifies the positions of the words and the word frequencies
n_topics = 9
ldamodel = gensim.models.ldamulticore.LdaMulticore(corpus, num_topics=n_topics, id2word=id2word, passes=15, random_state=42, workers=5)
topics = ldamodel.print_topics(num_words=10)
for topic in topics:
print(topic)
topics = ldamodel.show_topics(num_words = 10, num_topics = 9, formatted = False)
topics
(0, '0.008*"protest" + 0.007*"govern" + 0.006*"year" + 0.005*"war" + 0.004*"forc" + 0.004*"citi" + 0.004*"month" + 0.004*"countri" + 0.004*"demonstr" + 0.004*"first"') (1, '0.009*"nigeria" + 0.008*"develop" + 0.008*"technolog" + 0.007*"nigerian" + 0.006*"nation" + 0.005*"africa" + 0.005*"year" + 0.005*"world" + 0.005*"organis" + 0.004*"new"') (2, '0.011*"presid" + 0.009*"trump" + 0.008*"nation" + 0.006*"new" + 0.006*"state" + 0.005*"unit" + 0.005*"donald" + 0.004*"u" + 0.004*"human" + 0.004*"minist"') (3, '0.029*"state" + 0.016*"attack" + 0.016*"area" + 0.014*"kill" + 0.012*"boko" + 0.012*"haram" + 0.011*"local" + 0.010*"borno" + 0.010*"armi" + 0.009*"govern"') (4, '0.025*"kill" + 0.017*"forc" + 0.017*"attack" + 0.016*"air" + 0.013*"state" + 0.012*"terrorist" + 0.011*"oper" + 0.010*"militari" + 0.010*"nigerian" + 0.009*"provinc"') (5, '0.036*"state" + 0.018*"lago" + 0.012*"road" + 0.010*"accid" + 0.009*"govern" + 0.009*"tanker" + 0.009*"build" + 0.009*"area" + 0.008*"governor" + 0.008*"explos"') (6, '0.025*"nigeria" + 0.019*"presid" + 0.013*"right" + 0.013*"buhari" + 0.012*"nation" + 0.012*"state" + 0.012*"govern" + 0.012*"countri" + 0.011*"human" + 0.010*"nigerian"') (7, '0.043*"polic" + 0.026*"state" + 0.013*"kill" + 0.012*"suspect" + 0.010*"arrest" + 0.010*"offic" + 0.009*"command" + 0.007*"two" + 0.007*"three" + 0.006*"oper"') (8, '0.012*"year" + 0.009*"presid" + 0.009*"elect" + 0.007*"countri" + 0.005*"parti" + 0.005*"one" + 0.005*"last" + 0.005*"week" + 0.004*"polit" + 0.004*"day"')
[(0, [('protest', 0.008286956), ('govern', 0.0069004465), ('year', 0.005756284), ('war', 0.0046981266), ('forc', 0.004445322), ('citi', 0.004220822), ('month', 0.0040655932), ('countri', 0.004024475), ('demonstr', 0.0038685438), ('first', 0.0037678955)]), (1, [('nigeria', 0.008630902), ('develop', 0.007887234), ('technolog', 0.0077736326), ('nigerian', 0.0071380106), ('nation', 0.0056356867), ('africa', 0.0051089874), ('year', 0.0050389897), ('world', 0.0047182785), ('organis', 0.004559472), ('new', 0.0044070138)]), (2, [('presid', 0.010542161), ('trump', 0.008725722), ('nation', 0.008114826), ('new', 0.006131224), ('state', 0.005767569), ('unit', 0.0053335303), ('donald', 0.0052019767), ('u', 0.0041046506), ('human', 0.0040863166), ('minist', 0.004029168)]), (3, [('state', 0.029206444), ('attack', 0.01602447), ('area', 0.015680756), ('kill', 0.0142720975), ('boko', 0.011603227), ('haram', 0.0115341125), ('local', 0.0108846845), ('borno', 0.010392353), ('armi', 0.009918795), ('govern', 0.009184564)]), (4, [('kill', 0.024627868), ('forc', 0.017053306), ('attack', 0.016844219), ('air', 0.015982598), ('state', 0.01269297), ('terrorist', 0.011868753), ('oper', 0.010683266), ('militari', 0.010110382), ('nigerian', 0.009779353), ('provinc', 0.009486001)]), (5, [('state', 0.03624676), ('lago', 0.018469593), ('road', 0.011771105), ('accid', 0.010084235), ('govern', 0.009417585), ('tanker', 0.009270882), ('build', 0.009130688), ('area', 0.009121263), ('governor', 0.008463639), ('explos', 0.007666969)]), (6, [('nigeria', 0.025046647), ('presid', 0.01878234), ('right', 0.0130536845), ('buhari', 0.012776169), ('nation', 0.01242993), ('state', 0.012227562), ('govern', 0.0120492615), ('countri', 0.011570601), ('human', 0.010688285), ('nigerian', 0.010428283)]), (7, [('polic', 0.043476265), ('state', 0.025971236), ('kill', 0.013217243), ('suspect', 0.011832151), ('arrest', 0.010453578), ('offic', 0.010349238), ('command', 0.008562449), ('two', 0.007486697), ('three', 0.0066695767), ('oper', 0.006096005)]), (8, [('year', 0.0119478665), ('presid', 0.009259382), ('elect', 0.009163662), ('countri', 0.0066281706), ('parti', 0.005074749), ('one', 0.004636506), ('last', 0.004615871), ('week', 0.0045331996), ('polit', 0.0044286638), ('day', 0.0039917673)])]
q1_senti = df_q1.text.copy()
q1_senti = pd.DataFrame(q1_senti, index=None)
q2_senti = df_q2.text.copy()
q2_senti = pd.DataFrame(q2_senti, index=None)
q3_senti = df_q3.text.copy()
q3_senti = pd.DataFrame(q3_senti, index=None)
q4_senti = df_q4.text.copy()
q4_senti = pd.DataFrame(q4_senti, index=None)
from nltk import PunktSentenceTokenizer
pst = PunktSentenceTokenizer()
pst._params.abbrev_types.add('dr')
pst._params.abbrev_types.add('sen')
pst._params.abbrev_types.add('rep')
pst._params.abbrev_types.add('mr')
pst._params.abbrev_types.add('ms')
pst._params.abbrev_types.add('mrs')
pst._params.abbrev_types.add('u.s')
pst._params.abbrev_types.add('dr.')
pst._params.abbrev_types.add('sen.')
pst._params.abbrev_types.add('rep.')
pst._params.abbrev_types.add('mr.')
pst._params.abbrev_types.add('ms.')
pst._params.abbrev_types.add('mrs.')
pst._params.abbrev_types.add('u.s.')
pst._params.abbrev_types.add('d.c')
pst._params.abbrev_types.add('d.c.')
pst._params.abbrev_types.add('v.s.')
pst._params.abbrev_types.add('v.s')
pst._params.abbrev_types.add('ave.')
pst._params.abbrev_types.add('ave')
pst._params.abbrev_types.add('blvd')
pst._params.abbrev_types.add('blvd.')
pst._params.abbrev_types.add('rd')
pst._params.abbrev_types.add('rd.')
pst._params.abbrev_types.add('st')
pst._params.abbrev_types.add('st.')
pst._params.abbrev_types.add('e.g.')
pst._params.abbrev_types.add('e.g')
pst._params.abbrev_types.add('i.e.')
pst._params.abbrev_types.add('i.e')
pst._params.abbrev_types.add('etc')
pst._params.abbrev_types.add('etc.')
pst._params.abbrev_types.add('p.s.')
pst._params.abbrev_types.add('p.s')
pst._params.abbrev_types.add('u.k.')
pst._params.abbrev_types.add('u.k')
pst._params.abbrev_types.add('e.u')
pst._params.abbrev_types.add('e.u.')
pst._params.abbrev_types.add('u.s.a.')
pst._params.abbrev_types.add('u.s.a')
pst._params.abbrev_types.add('a.m.')
pst._params.abbrev_types.add('a.m')
pst._params.abbrev_types.add('p.m.')
pst._params.abbrev_types.add('p.m')
pst._params.abbrev_types.add('u.a.e.')
pst._params.abbrev_types.add('u.a.e')
q1_sentences = []
for corpus in q1_senti.text:
q1_sentences.append(pst.sentences_from_text(corpus))
q2_sentences = []
for corpus in q2_senti.text:
q2_sentences.append(pst.sentences_from_text(corpus))
q3_sentences = []
for corpus in q3_senti.text:
q3_sentences.append(pst.sentences_from_text(corpus))
q4_sentences = []
for corpus in q4_senti.text:
q4_sentences.append(pst.sentences_from_text(corpus))
q1_sentences = pd.DataFrame(q1_sentences, index=None)
q2_sentences = pd.DataFrame(q2_sentences, index=None)
q3_sentences = pd.DataFrame(q3_sentences, index=None)
q4_sentences = pd.DataFrame(q4_sentences, index=None)
q1_sentences.shape
(3158, 143)
q1_sentences.head(3)
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | the security situation in northern burkina fas... | nearly people have died in intercommunal viole... | this reportedly came after militants killed a ... | a state of emergency has been in place across ... | this gives security forces extra powers. | but the authorities have not deployed any new ... | there is potential for disruptive strikes to o... | the nigeria labour congress nlc the country's ... | and it has that said it is considering organis... | previous nlc strikes over the minimum wage hav... | ... | None | None | None | None | None | None | None | None | None | None |
1 | armed conflict on january at about hrs informa... | the attackers were repelled after security for... | thenumber of casualties is unknown.on january ... | the troops reportedly neutralised an unknown n... | the casualty figure during the operation is no... | the attack was reportedly repelled and theatta... | the number of casualty is unknown. | on january at about hrs according to informati... | subsequently the attack was repelled immediate... | the number of casualties is unknown. | ... | None | None | None | None | None | None | None | None | None | None |
2 | on january the un voiced alarm at the uprootin... | attacks by isis in west africa and the boko ha... | security has become a campaign issue. | None | None | None | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | None |
3 rows × 143 columns
def sentiment_fun(var):
#pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
sentiment = SentimentIntensityAnalyzer()
out_score = sentiment.polarity_scores(var)
return out_score['compound']
q1_vader_sentences = [[sentiment_fun(sentence) for sentence in corpus if sentence is not None] for corpus in q1_sentences.values]
q1_vader_sentences_df = pd.DataFrame(q1_vader_sentences)
q2_vader_sentences = [[sentiment_fun(sentence) for sentence in corpus if sentence is not None] for corpus in q2_sentences.values]
q2_vader_sentences_df = pd.DataFrame(q2_vader_sentences)
q3_vader_sentences = [[sentiment_fun(sentence) for sentence in corpus if sentence is not None] for corpus in q3_sentences.values]
q3_vader_sentences_df = pd.DataFrame(q3_vader_sentences)
q4_vader_sentences = [[sentiment_fun(sentence) for sentence in corpus if sentence is not None] for corpus in q4_sentences.values]
q4_vader_sentences_df = pd.DataFrame(q4_vader_sentences)
q1_vader_sentences_df.shape
(3158, 143)
q2_vader_sentences_df.shape
(7269, 143)
q3_vader_sentences_df.shape
(7512, 141)
q4_vader_sentences_df.shape
(6455, 108)
q1_vader_sentences_df.replace(0.0000, np.NaN, inplace=True)
q1_vader_sentences_df.head(3)
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.2263 | -0.8271 | -0.8126 | -0.3818 | 0.3400 | -0.0387 | -0.5859 | -0.2263 | -0.3612 | -0.6124 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 0.2960 | 0.1027 | 0.3400 | NaN | 0.1779 | -0.4767 | -0.4767 | 0.5423 | -0.1531 | 0.0772 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | -0.8612 | -0.2023 | 0.3400 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 rows × 143 columns
q2_vader_sentences_df.replace(0.0000, np.NaN, inplace=True)
q3_vader_sentences_df.replace(0.0000, np.NaN, inplace=True)
q4_vader_sentences_df.replace(0.0000, np.NaN, inplace=True)
q1_sentences_vader_medians = q1_vader_sentences_df.median(axis=1, skipna=True)
q2_sentences_vader_medians = q2_vader_sentences_df.median(axis=1, skipna=True)
q3_sentences_vader_medians = q3_vader_sentences_df.median(axis=1, skipna=True)
q4_sentences_vader_medians = q4_vader_sentences_df.median(axis=1, skipna=True)
q1_overall_median_vader_score = q1_sentences_vader_medians.median(axis=0, skipna=True)
print(q1_overall_median_vader_score)
-0.3818
q2_overall_median_vader_score = q2_sentences_vader_medians.median(axis=0, skipna=True)
print(q2_overall_median_vader_score)
-0.3818
q3_overall_median_vader_score = q3_sentences_vader_medians.median(axis=0, skipna=True)
print(q3_overall_median_vader_score)
-0.36569999999999997
q4_overall_median_vader_score = q4_sentences_vader_medians.median(axis=0, skipna=True)
print(q4_overall_median_vader_score)
-0.31875
from itertools import chain
q1_one_sentence_per_row = list(chain.from_iterable(q1_sentences.values))
q2_one_sentence_per_row = list(chain.from_iterable(q2_sentences.values))
q3_one_sentence_per_row = list(chain.from_iterable(q3_sentences.values))
q4_one_sentence_per_row = list(chain.from_iterable(q4_sentences.values))
len(q1_one_sentence_per_row)
451594
q1_one_sentence_per_row = list(filter(None, q1_one_sentence_per_row))
len(q1_one_sentence_per_row)
8139
q1_one_sentence_per_row = pd.DataFrame(q1_one_sentence_per_row)
len(q2_one_sentence_per_row)
1039467
q2_one_sentence_per_row = list(filter(None, q2_one_sentence_per_row))
len(q2_one_sentence_per_row)
17137
q2_one_sentence_per_row = pd.DataFrame(q2_one_sentence_per_row)
len(q3_one_sentence_per_row)
1059192
q3_one_sentence_per_row = list(filter(None, q3_one_sentence_per_row))
len(q3_one_sentence_per_row)
18121
q3_one_sentence_per_row = pd.DataFrame(q3_one_sentence_per_row)
len(q4_one_sentence_per_row)
697140
q4_one_sentence_per_row = list(filter(None, q4_one_sentence_per_row))
len(q4_one_sentence_per_row)
16141
q4_one_sentence_per_row = pd.DataFrame(q4_one_sentence_per_row)
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
q1_one_sentence_per_row['vader'] = q1_one_sentence_per_row[0].apply(sentiment_fun)
q1_one_sentence_per_row.rename(columns = {0:'text'}, inplace = True)
q2_one_sentence_per_row['vader'] = q2_one_sentence_per_row[0].apply(sentiment_fun)
q2_one_sentence_per_row.rename(columns = {0:'text'}, inplace = True)
q3_one_sentence_per_row['vader'] = q3_one_sentence_per_row[0].apply(sentiment_fun)
q3_one_sentence_per_row.rename(columns = {0:'text'}, inplace = True)
q4_one_sentence_per_row['vader'] = q4_one_sentence_per_row[0].apply(sentiment_fun)
q4_one_sentence_per_row.rename(columns = {0:'text'}, inplace = True)
q1_one_sentence_per_row_vader_no_zeros = q1_one_sentence_per_row.vader[q1_one_sentence_per_row['vader'] != float(0.000000)]
q1_one_sentence_per_row_vader_no_zeros.describe()
count 5825.000000 mean -0.245656 std 0.528684 min -0.980800 25% -0.690800 50% -0.401900 75% 0.250000 max 0.960100 Name: vader, dtype: float64
q1_one_sentence_per_row_vader_no_zeros.median(axis=0)
-0.4019
q2_one_sentence_per_row_vader_no_zeros = q2_one_sentence_per_row.vader[q2_one_sentence_per_row['vader'] != float(0.000000)]
q2_one_sentence_per_row_vader_no_zeros.describe()
count 11787.000000 mean -0.261280 std 0.529099 min -0.987300 25% -0.700300 50% -0.421500 75% 0.238200 max 0.963300 Name: vader, dtype: float64
q2_one_sentence_per_row_vader_no_zeros.median(axis=0)
-0.4215
q3_one_sentence_per_row_vader_no_zeros = q3_one_sentence_per_row.vader[q3_one_sentence_per_row['vader'] != float(0.000000)]
q3_one_sentence_per_row_vader_no_zeros.describe()
count 12421.000000 mean -0.256102 std 0.524448 min -0.989300 25% -0.695600 50% -0.421500 75% 0.238200 max 0.971600 Name: vader, dtype: float64
q3_one_sentence_per_row_vader_no_zeros.median(axis=0)
-0.4215
q4_one_sentence_per_row_vader_no_zeros = q4_one_sentence_per_row.vader[q4_one_sentence_per_row['vader'] != float(0.000000)]
q4_one_sentence_per_row_vader_no_zeros.describe()
count 10736.000000 mean -0.233173 std 0.537499 min -0.980800 25% -0.680800 50% -0.401900 75% 0.296000 max 0.966100 Name: vader, dtype: float64
q4_one_sentence_per_row_vader_no_zeros.median(axis=0)
-0.4019
q1_one_sentence_per_row_elections = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("election") | q1_one_sentence_per_row.text.str.contains("elections")])
q1_one_sentence_per_row_elections_vader_no_zeros = q1_one_sentence_per_row_elections.vader[q1_one_sentence_per_row_elections['vader'] != float(0.000000)]
q1_one_sentence_per_row_elections_vader_no_zeros.describe()
count 355.000000 mean -0.085052 std 0.545170 min -0.933700 25% -0.602400 50% -0.161100 75% 0.440400 max 0.902300 Name: vader, dtype: float64
q1_one_sentence_per_row_elections_vader_no_zeros.median(axis=0)
-0.1611
q2_one_sentence_per_row_elections = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("election") | q2_one_sentence_per_row.text.str.contains("elections")])
q2_one_sentence_per_row_elections_vader_no_zeros = q2_one_sentence_per_row_elections.vader[q2_one_sentence_per_row_elections['vader'] != float(0.000000)]
q2_one_sentence_per_row_elections_vader_no_zeros.describe()
count 163.000000 mean 0.045700 std 0.554379 min -0.948500 25% -0.449600 50% 0.077200 75% 0.571900 max 0.886000 Name: vader, dtype: float64
q2_one_sentence_per_row_elections_vader_no_zeros.median(axis=0)
0.0772
q3_one_sentence_per_row_elections = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("election") | q3_one_sentence_per_row.text.str.contains("elections")])
q3_one_sentence_per_row_elections_vader_no_zeros = q3_one_sentence_per_row_elections.vader[q3_one_sentence_per_row_elections['vader'] != float(0.000000)]
q3_one_sentence_per_row_elections_vader_no_zeros.describe()
count 158.000000 mean -0.099552 std 0.508252 min -0.932500 25% -0.568275 50% -0.153100 75% 0.401900 max 0.812600 Name: vader, dtype: float64
q3_one_sentence_per_row_elections_vader_no_zeros.median(axis=0)
-0.1531
q4_one_sentence_per_row_elections = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("election") | q4_one_sentence_per_row.text.str.contains("elections")])
q4_one_sentence_per_row_elections_vader_no_zeros = q4_one_sentence_per_row_elections.vader[q4_one_sentence_per_row_elections['vader'] != float(0.000000)]
q4_one_sentence_per_row_elections_vader_no_zeros.describe()
count 122.000000 mean -0.020583 std 0.564596 min -0.941300 25% -0.522675 50% -0.054400 75% 0.476700 max 0.900100 Name: vader, dtype: float64
q4_one_sentence_per_row_elections_vader_no_zeros.median(axis=0)
-0.054400000000000004
q1_one_sentence_per_row_protests = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("protest") | q1_one_sentence_per_row.text.str.contains("protests") | q1_one_sentence_per_row.text.str.contains("protester") | q1_one_sentence_per_row.text.str.contains("protesters")])
q1_one_sentence_per_row_protests_vader_no_zeros = q1_one_sentence_per_row_protests.vader[q1_one_sentence_per_row_protests['vader'] != float(0.000000)]
q1_one_sentence_per_row_protests_vader_no_zeros.describe()
count 183.000000 mean -0.305215 std 0.413701 min -0.945100 25% -0.648600 50% -0.294400 75% -0.128000 max 0.893400 Name: vader, dtype: float64
q1_one_sentence_per_row_protests_vader_no_zeros.median(axis=0)
-0.2944
q2_one_sentence_per_row_protests = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("protest") | q2_one_sentence_per_row.text.str.contains("protests") | q2_one_sentence_per_row.text.str.contains("protester") | q2_one_sentence_per_row.text.str.contains("protesters")])
q2_one_sentence_per_row_protests_vader_no_zeros = q2_one_sentence_per_row_protests.vader[q2_one_sentence_per_row_protests['vader'] != float(0.000000)]
q2_one_sentence_per_row_protests_vader_no_zeros.describe()
count 259.000000 mean -0.361518 std 0.404511 min -0.972300 25% -0.695550 50% -0.421500 75% -0.214300 max 0.877900 Name: vader, dtype: float64
q2_one_sentence_per_row_protests_vader_no_zeros.median(axis=0)
-0.4215
q3_one_sentence_per_row_protests = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("protest") | q3_one_sentence_per_row.text.str.contains("protests") | q3_one_sentence_per_row.text.str.contains("protester") | q3_one_sentence_per_row.text.str.contains("protesters")])
q3_one_sentence_per_row_protests_vader_no_zeros = q3_one_sentence_per_row_protests.vader[q3_one_sentence_per_row_protests['vader'] != float(0.000000)]
q3_one_sentence_per_row_protests_vader_no_zeros.describe()
count 543.000000 mean -0.461390 std 0.382368 min -0.957800 25% -0.775000 50% -0.542300 75% -0.226300 max 0.851100 Name: vader, dtype: float64
q3_one_sentence_per_row_protests_vader_no_zeros.median(axis=0)
-0.5423
q4_one_sentence_per_row_protests = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("protest") | q4_one_sentence_per_row.text.str.contains("protests") | q4_one_sentence_per_row.text.str.contains("protester") | q4_one_sentence_per_row.text.str.contains("protesters")])
q4_one_sentence_per_row_protests_vader_no_zeros = q4_one_sentence_per_row_protests.vader[q4_one_sentence_per_row_protests['vader'] != float(0.000000)]
q4_one_sentence_per_row_protests_vader_no_zeros.describe()
count 299.000000 mean -0.441024 std 0.373076 min -0.945100 25% -0.734700 50% -0.526700 75% -0.226300 max 0.800500 Name: vader, dtype: float64
q4_one_sentence_per_row_protests_vader_no_zeros.median(axis=0)
-0.5267
q1_one_sentence_per_row_rights = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("rights") | q1_one_sentence_per_row.text.str.contains("freedom") | q1_one_sentence_per_row.text.str.contains("liberty") | q1_one_sentence_per_row.text.str.contains("liberties")])
q1_one_sentence_per_row_rights_vader_no_zeros = q1_one_sentence_per_row_rights.vader[q1_one_sentence_per_row_rights['vader'] != float(0.000000)]
q1_one_sentence_per_row_rights_vader_no_zeros.describe()
count 135.000000 mean -0.160793 std 0.594134 min -0.980800 25% -0.700200 50% -0.296000 75% 0.371500 max 0.944200 Name: vader, dtype: float64
q1_one_sentence_per_row_rights_vader_no_zeros.median(axis=0)
-0.296
q2_one_sentence_per_row_rights = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("rights") | q2_one_sentence_per_row.text.str.contains("freedom") | q2_one_sentence_per_row.text.str.contains("liberty") | q2_one_sentence_per_row.text.str.contains("liberties")])
q2_one_sentence_per_row_rights_vader_no_zeros = q2_one_sentence_per_row_rights.vader[q2_one_sentence_per_row_rights['vader'] != float(0.000000)]
q2_one_sentence_per_row_rights_vader_no_zeros.describe()
count 323.000000 mean -0.084218 std 0.588692 min -0.971600 25% -0.624900 50% -0.177900 75% 0.485300 max 0.954500 Name: vader, dtype: float64
q2_one_sentence_per_row_rights_vader_no_zeros.median(axis=0)
-0.1779
q3_one_sentence_per_row_rights = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("rights") | q3_one_sentence_per_row.text.str.contains("freedom") | q3_one_sentence_per_row.text.str.contains("liberty") | q3_one_sentence_per_row.text.str.contains("liberties")])
q3_one_sentence_per_row_rights_vader_no_zeros = q3_one_sentence_per_row_rights.vader[q3_one_sentence_per_row_rights['vader'] != float(0.000000)]
q3_one_sentence_per_row_rights_vader_no_zeros.describe()
count 379.000000 mean -0.113163 std 0.557515 min -0.938200 25% -0.618650 50% -0.273200 75% 0.421500 max 0.911800 Name: vader, dtype: float64
q3_one_sentence_per_row_rights_vader_no_zeros.median(axis=0)
-0.2732
q4_one_sentence_per_row_rights = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("rights") | q4_one_sentence_per_row.text.str.contains("freedom") | q4_one_sentence_per_row.text.str.contains("liberty") | q4_one_sentence_per_row.text.str.contains("liberties")])
q4_one_sentence_per_row_rights_vader_no_zeros = q4_one_sentence_per_row_rights.vader[q4_one_sentence_per_row_rights['vader'] != float(0.000000)]
q4_one_sentence_per_row_rights_vader_no_zeros.describe()
count 426.000000 mean -0.004027 std 0.572954 min -0.953800 25% -0.510600 50% -0.064400 75% 0.526700 max 0.950900 Name: vader, dtype: float64
q4_one_sentence_per_row_rights_vader_no_zeros.median(axis=0)
-0.0644
q1_one_sentence_per_row_government = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("government")])
q1_one_sentence_per_row_government_vader_no_zeros = q1_one_sentence_per_row_government.vader[q1_one_sentence_per_row_government['vader'] != float(0.000000)]
q1_one_sentence_per_row_government_vader_no_zeros.describe()
count 568.000000 mean -0.354443 std 0.501744 min -0.972300 25% -0.735100 50% -0.526700 75% -0.025800 max 0.954500 Name: vader, dtype: float64
q1_one_sentence_per_row_government_vader_no_zeros.median(axis=0)
-0.5267
q2_one_sentence_per_row_government = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("government")])
q2_one_sentence_per_row_government_vader_no_zeros = q2_one_sentence_per_row_government.vader[q2_one_sentence_per_row_government['vader'] != float(0.000000)]
q2_one_sentence_per_row_government_vader_no_zeros.describe()
count 1205.000000 mean -0.330578 std 0.520185 min -0.972300 25% -0.771700 50% -0.493900 75% 0.077200 max 0.950900 Name: vader, dtype: float64
q2_one_sentence_per_row_government_vader_no_zeros.median(axis=0)
-0.4939
q3_one_sentence_per_row_government = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("government")])
q3_one_sentence_per_row_government_vader_no_zeros = q3_one_sentence_per_row_government.vader[q3_one_sentence_per_row_government['vader'] != float(0.000000)]
q3_one_sentence_per_row_government_vader_no_zeros.describe()
count 1332.000000 mean -0.287866 std 0.525666 min -0.989300 25% -0.735100 50% -0.458800 75% 0.153100 max 0.932500 Name: vader, dtype: float64
q3_one_sentence_per_row_government_vader_no_zeros.median(axis=0)
-0.4588
q4_one_sentence_per_row_government = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("government")])
q4_one_sentence_per_row_government_vader_no_zeros = q4_one_sentence_per_row_government.vader[q4_one_sentence_per_row_government['vader'] != float(0.000000)]
q4_one_sentence_per_row_government_vader_no_zeros.describe()
count 1003.000000 mean -0.260251 std 0.546112 min -0.964300 25% -0.726900 50% -0.440400 75% 0.273200 max 0.921700 Name: vader, dtype: float64
q4_one_sentence_per_row_government_vader_no_zeros.median(axis=0)
-0.4404
q1_one_sentence_per_row_army = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("nigerian army")])
q1_one_sentence_per_row_army_vader_no_zeros = q1_one_sentence_per_row_army.vader[q1_one_sentence_per_row_army['vader'] != float(0.000000)]
q1_one_sentence_per_row_army_vader_no_zeros.describe()
count 48.000000 mean -0.331706 std 0.551908 min -0.934900 25% -0.788875 50% -0.485300 75% 0.063825 max 0.822100 Name: vader, dtype: float64
q1_one_sentence_per_row_army_vader_no_zeros.median(axis=0)
-0.4853
q2_one_sentence_per_row_army = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("nigerian army")])
q2_one_sentence_per_row_army_vader_no_zeros = q2_one_sentence_per_row_army.vader[q2_one_sentence_per_row_army['vader'] != float(0.000000)]
q2_one_sentence_per_row_army_vader_no_zeros.describe()
count 201.000000 mean -0.590305 std 0.418051 min -0.962800 25% -0.862500 50% -0.778300 75% -0.476700 max 0.875000 Name: vader, dtype: float64
q2_one_sentence_per_row_army_vader_no_zeros.median(axis=0)
-0.7783
q3_one_sentence_per_row_army = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("nigerian army")])
q3_one_sentence_per_row_army_vader_no_zeros = q3_one_sentence_per_row_army.vader[q3_one_sentence_per_row_army['vader'] != float(0.000000)]
q3_one_sentence_per_row_army_vader_no_zeros.describe()
count 270.000000 mean -0.422124 std 0.501534 min -0.971600 25% -0.840200 50% -0.636750 75% -0.128000 max 0.807400 Name: vader, dtype: float64
q3_one_sentence_per_row_army_vader_no_zeros.median(axis=0)
-0.6367499999999999
q4_one_sentence_per_row_army = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("nigerian army")])
q4_one_sentence_per_row_army_vader_no_zeros = q4_one_sentence_per_row_army.vader[q4_one_sentence_per_row_army['vader'] != float(0.000000)]
q4_one_sentence_per_row_army_vader_no_zeros.describe()
count 180.000000 mean -0.266293 std 0.575530 min -0.960100 25% -0.771700 50% -0.421500 75% 0.361200 max 0.857300 Name: vader, dtype: float64
q4_one_sentence_per_row_army_vader_no_zeros.median(axis=0)
-0.4215
q1_one_sentence_per_row_police = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("police")])
q1_one_sentence_per_row_police_vader_no_zeros = q1_one_sentence_per_row_police.vader[q1_one_sentence_per_row_police['vader'] != float(0.000000)]
q1_one_sentence_per_row_police_vader_no_zeros.describe()
count 283.000000 mean -0.515807 std 0.412829 min -0.953800 25% -0.815100 50% -0.670500 75% -0.389200 max 0.831600 Name: vader, dtype: float64
q1_one_sentence_per_row_police_vader_no_zeros.median(axis=0)
-0.6705
q2_one_sentence_per_row_police = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("police")])
q2_one_sentence_per_row_police_vader_no_zeros = q2_one_sentence_per_row_police.vader[q2_one_sentence_per_row_police['vader'] != float(0.000000)]
q2_one_sentence_per_row_police_vader_no_zeros.describe()
count 762.000000 mean -0.475327 std 0.436995 min -0.978100 25% -0.807300 50% -0.624900 75% -0.300750 max 0.844200 Name: vader, dtype: float64
q2_one_sentence_per_row_police_vader_no_zeros.median(axis=0)
-0.6249
q3_one_sentence_per_row_police = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("police")])
q3_one_sentence_per_row_police_vader_no_zeros = q3_one_sentence_per_row_police.vader[q3_one_sentence_per_row_police['vader'] != float(0.000000)]
q3_one_sentence_per_row_police_vader_no_zeros.describe()
count 1158.000000 mean -0.470303 std 0.440810 min -0.969800 25% -0.790600 50% -0.648600 75% -0.296000 max 0.884700 Name: vader, dtype: float64
q3_one_sentence_per_row_police_vader_no_zeros.median(axis=0)
-0.6486
q4_one_sentence_per_row_police = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("police")])
q4_one_sentence_per_row_police_vader_no_zeros = q4_one_sentence_per_row_police.vader[q4_one_sentence_per_row_police['vader'] != float(0.000000)]
q4_one_sentence_per_row_police_vader_no_zeros.describe()
count 680.000000 mean -0.464350 std 0.437471 min -0.976900 25% -0.790600 50% -0.605900 75% -0.337275 max 0.889800 Name: vader, dtype: float64
q4_one_sentence_per_row_police_vader_no_zeros.median(axis=0)
-0.6059000000000001
q1_one_sentence_per_row_courts = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("justice system") | q1_one_sentence_per_row.text.str.contains("judicial system") | q1_one_sentence_per_row.text.str.contains("judiciary") | q1_one_sentence_per_row.text.str.contains("court system") | q1_one_sentence_per_row.text.str.contains("court") | q1_one_sentence_per_row.text.str.contains("courts")])
q1_one_sentence_per_row_courts_vader_no_zeros = q1_one_sentence_per_row_courts.vader[q1_one_sentence_per_row_courts['vader'] != float(0.000000)]
q1_one_sentence_per_row_courts_vader_no_zeros.describe()
count 142.000000 mean -0.223411 std 0.529118 min -0.946000 25% -0.719875 50% -0.284600 75% 0.226300 max 0.946000 Name: vader, dtype: float64
q1_one_sentence_per_row_courts_vader_no_zeros.median(axis=0)
-0.28459999999999996
q2_one_sentence_per_row_courts = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("justice system") | q2_one_sentence_per_row.text.str.contains("judicial system") | q2_one_sentence_per_row.text.str.contains("judiciary") | q2_one_sentence_per_row.text.str.contains("court system") | q2_one_sentence_per_row.text.str.contains("court") | q2_one_sentence_per_row.text.str.contains("courts")])
q2_one_sentence_per_row_courts_vader_no_zeros = q2_one_sentence_per_row_courts.vader[q2_one_sentence_per_row_courts['vader'] != float(0.000000)]
q2_one_sentence_per_row_courts_vader_no_zeros.describe()
count 207.000000 mean -0.180316 std 0.532890 min -0.944000 25% -0.648600 50% -0.361200 75% 0.329100 max 0.902200 Name: vader, dtype: float64
q2_one_sentence_per_row_courts_vader_no_zeros.median(axis=0)
-0.3612
q3_one_sentence_per_row_courts = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("justice system") | q3_one_sentence_per_row.text.str.contains("judicial system") | q3_one_sentence_per_row.text.str.contains("judiciary") | q3_one_sentence_per_row.text.str.contains("court system") | q3_one_sentence_per_row.text.str.contains("court") | q3_one_sentence_per_row.text.str.contains("courts")])
q3_one_sentence_per_row_courts_vader_no_zeros = q3_one_sentence_per_row_courts.vader[q3_one_sentence_per_row_courts['vader'] != float(0.000000)]
q3_one_sentence_per_row_courts_vader_no_zeros.describe()
count 271.000000 mean -0.157954 std 0.576899 min -0.967000 25% -0.690800 50% -0.296000 75% 0.371500 max 0.888500 Name: vader, dtype: float64
q3_one_sentence_per_row_courts_vader_no_zeros.median(axis=0)
-0.296
q4_one_sentence_per_row_courts = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("justice system") | q4_one_sentence_per_row.text.str.contains("judicial system") | q4_one_sentence_per_row.text.str.contains("judiciary") | q4_one_sentence_per_row.text.str.contains("court system") | q4_one_sentence_per_row.text.str.contains("court") | q4_one_sentence_per_row.text.str.contains("courts")])
q4_one_sentence_per_row_courts_vader_no_zeros = q4_one_sentence_per_row_courts.vader[q4_one_sentence_per_row_courts['vader'] != float(0.000000)]
q4_one_sentence_per_row_courts_vader_no_zeros.describe()
count 291.000000 mean -0.123811 std 0.577444 min -0.980800 25% -0.659700 50% -0.250000 75% 0.421500 max 0.906200 Name: vader, dtype: float64
q4_one_sentence_per_row_courts_vader_no_zeros.median(axis=0)
-0.25
q1_one_sentence_per_row_boko_haram = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("boko haram")])
q1_one_sentence_per_row_boko_haram_vader_no_zeros = q1_one_sentence_per_row_boko_haram.vader[q1_one_sentence_per_row_boko_haram['vader'] != float(0.000000)]
q1_one_sentence_per_row_boko_haram_vader_no_zeros.describe()
count 288.000000 mean -0.508719 std 0.425532 min -0.968200 25% -0.822500 50% -0.670500 75% -0.361200 max 0.836600 Name: vader, dtype: float64
q1_one_sentence_per_row_boko_haram_vader_no_zeros.median(axis=0)
-0.6705
q2_one_sentence_per_row_boko_haram = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("boko haram")])
q2_one_sentence_per_row_boko_haram_vader_no_zeros = q2_one_sentence_per_row_boko_haram.vader[q2_one_sentence_per_row_boko_haram['vader'] != float(0.000000)]
q2_one_sentence_per_row_boko_haram_vader_no_zeros.describe()
count 644.000000 mean -0.531293 std 0.441749 min -0.964800 25% -0.862500 50% -0.680800 75% -0.381800 max 0.963300 Name: vader, dtype: float64
q2_one_sentence_per_row_boko_haram_vader_no_zeros.median(axis=0)
-0.6808
q3_one_sentence_per_row_boko_haram = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("boko haram")])
q3_one_sentence_per_row_boko_haram_vader_no_zeros = q3_one_sentence_per_row_boko_haram.vader[q3_one_sentence_per_row_boko_haram['vader'] != float(0.000000)]
q3_one_sentence_per_row_boko_haram_vader_no_zeros.describe()
count 703.000000 mean -0.533254 std 0.400835 min -0.971600 25% -0.859100 50% -0.624900 75% -0.381800 max 0.844200 Name: vader, dtype: float64
q3_one_sentence_per_row_boko_haram_vader_no_zeros.median(axis=0)
-0.6249
q4_one_sentence_per_row_boko_haram = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("boko haram")])
q4_one_sentence_per_row_boko_haram_vader_no_zeros = q4_one_sentence_per_row_boko_haram.vader[q4_one_sentence_per_row_boko_haram['vader'] != float(0.000000)]
q4_one_sentence_per_row_boko_haram_vader_no_zeros.describe()
count 500.000000 mean -0.521087 std 0.432834 min -0.969400 25% -0.849050 50% -0.670500 75% -0.334550 max 0.784100 Name: vader, dtype: float64
q4_one_sentence_per_row_boko_haram_vader_no_zeros.median(axis=0)
-0.6705
q1_one_sentence_per_row_isis = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("islamic state") | q1_one_sentence_per_row.text.str.contains("isis") | q1_one_sentence_per_row.text.str.contains("isil")])
q1_one_sentence_per_row_isis_vader_no_zeros = q1_one_sentence_per_row_isis.vader[q1_one_sentence_per_row_isis['vader'] != float(0.000000)]
q1_one_sentence_per_row_isis_vader_no_zeros.describe()
count 144.000000 mean -0.569021 std 0.339280 min -0.955200 25% -0.812600 50% -0.642750 75% -0.401900 max 0.784500 Name: vader, dtype: float64
q1_one_sentence_per_row_isis_vader_no_zeros.median(axis=0)
-0.6427499999999999
q2_one_sentence_per_row_isis = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("islamic state") | q2_one_sentence_per_row.text.str.contains("isis") | q2_one_sentence_per_row.text.str.contains("isil")])
q2_one_sentence_per_row_isis_vader_no_zeros = q2_one_sentence_per_row_isis.vader[q2_one_sentence_per_row_isis['vader'] != float(0.000000)]
q2_one_sentence_per_row_isis_vader_no_zeros.describe()
count 268.000000 mean -0.581148 std 0.369688 min -0.974100 25% -0.831600 50% -0.690800 75% -0.440400 max 0.954500 Name: vader, dtype: float64
q2_one_sentence_per_row_isis_vader_no_zeros.median(axis=0)
-0.6908
q3_one_sentence_per_row_isis = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("islamic state") | q3_one_sentence_per_row.text.str.contains("isis") | q3_one_sentence_per_row.text.str.contains("isil")])
q3_one_sentence_per_row_isis_vader_no_zeros = q3_one_sentence_per_row_isis.vader[q3_one_sentence_per_row_isis['vader'] != float(0.000000)]
q3_one_sentence_per_row_isis_vader_no_zeros.describe()
count 285.000000 mean -0.595236 std 0.352697 min -0.961200 25% -0.840200 50% -0.700300 75% -0.476700 max 0.784500 Name: vader, dtype: float64
q3_one_sentence_per_row_isis_vader_no_zeros.median(axis=0)
-0.7003
q4_one_sentence_per_row_isis = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("islamic state") | q4_one_sentence_per_row.text.str.contains("isis") | q4_one_sentence_per_row.text.str.contains("isil")])
q4_one_sentence_per_row_isis_vader_no_zeros = q4_one_sentence_per_row_isis.vader[q4_one_sentence_per_row_isis['vader'] != float(0.000000)]
q4_one_sentence_per_row_isis_vader_no_zeros.describe()
count 275.000000 mean -0.544879 std 0.363350 min -0.963300 25% -0.824800 50% -0.659700 75% -0.371500 max 0.680800 Name: vader, dtype: float64
q4_one_sentence_per_row_isis_vader_no_zeros.median(axis=0)
-0.6597
q1_one_sentence_per_row_safety = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("safety")])
q1_one_sentence_per_row_safety_vader_no_zeros = q1_one_sentence_per_row_safety.vader[q1_one_sentence_per_row_safety['vader'] != float(0.000000)]
q1_one_sentence_per_row_safety_vader_no_zeros.describe()
count 56.000000 mean 0.182095 std 0.553988 min -0.831600 25% -0.371375 50% 0.411700 75% 0.642600 max 0.946000 Name: vader, dtype: float64
q1_one_sentence_per_row_safety_vader_no_zeros.median(axis=0)
0.41169999999999995
q2_one_sentence_per_row_safety = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("safety")])
q2_one_sentence_per_row_safety_vader_no_zeros = q2_one_sentence_per_row_safety.vader[q2_one_sentence_per_row_safety['vader'] != float(0.000000)]
q2_one_sentence_per_row_safety_vader_no_zeros.describe()
count 115.000000 mean 0.152760 std 0.519354 min -0.859100 25% -0.282150 50% 0.318200 75% 0.571900 max 0.877900 Name: vader, dtype: float64
q2_one_sentence_per_row_safety_vader_no_zeros.median(axis=0)
0.3182
q3_one_sentence_per_row_safety = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("safety")])
q3_one_sentence_per_row_safety_vader_no_zeros = q3_one_sentence_per_row_safety.vader[q3_one_sentence_per_row_safety['vader'] != float(0.000000)]
q3_one_sentence_per_row_safety_vader_no_zeros.describe()
count 116.000000 mean 0.171347 std 0.469071 min -0.855500 25% -0.124975 50% 0.381800 75% 0.518525 max 0.913600 Name: vader, dtype: float64
q3_one_sentence_per_row_safety_vader_no_zeros.median(axis=0)
0.3818
q4_one_sentence_per_row_safety = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("safety")])
q4_one_sentence_per_row_safety_vader_no_zeros = q4_one_sentence_per_row_safety.vader[q4_one_sentence_per_row_safety['vader'] != float(0.000000)]
q4_one_sentence_per_row_safety_vader_no_zeros.describe()
count 180.000000 mean 0.085909 std 0.516771 min -0.918600 25% -0.453850 50% 0.214300 75% 0.421500 max 0.915300 Name: vader, dtype: float64
q4_one_sentence_per_row_safety_vader_no_zeros.median(axis=0)
0.2143
q1_one_sentence_per_row_violence = pd.DataFrame(q1_one_sentence_per_row[q1_one_sentence_per_row.text.str.contains("violence")])
q1_one_sentence_per_row_violence_vader_no_zeros = q1_one_sentence_per_row_violence.vader[q1_one_sentence_per_row_violence['vader'] != float(0.000000)]
q1_one_sentence_per_row_violence_vader_no_zeros.describe()
count 132.000000 mean -0.649552 std 0.305687 min -0.967000 25% -0.827100 50% -0.718400 75% -0.624900 max 0.844000 Name: vader, dtype: float64
q1_one_sentence_per_row_violence_vader_no_zeros.median(axis=0)
-0.7184
q2_one_sentence_per_row_violence = pd.DataFrame(q2_one_sentence_per_row[q2_one_sentence_per_row.text.str.contains("violence")])
q2_one_sentence_per_row_violence_vader_no_zeros = q2_one_sentence_per_row_violence.vader[q2_one_sentence_per_row_violence['vader'] != float(0.000000)]
q2_one_sentence_per_row_violence_vader_no_zeros.describe()
count 188.000000 mean -0.702273 std 0.232224 min -0.983200 25% -0.862500 50% -0.735100 75% -0.624900 max 0.401900 Name: vader, dtype: float64
q2_one_sentence_per_row_violence_vader_no_zeros.median(axis=0)
-0.7351
q3_one_sentence_per_row_violence = pd.DataFrame(q3_one_sentence_per_row[q3_one_sentence_per_row.text.str.contains("violence")])
q3_one_sentence_per_row_violence_vader_no_zeros = q3_one_sentence_per_row_violence.vader[q3_one_sentence_per_row_violence['vader'] != float(0.000000)]
q3_one_sentence_per_row_violence_vader_no_zeros.describe()
count 185.000000 mean -0.669562 std 0.322621 min -0.989300 25% -0.862500 50% -0.771700 75% -0.612400 max 0.767400 Name: vader, dtype: float64
q3_one_sentence_per_row_violence_vader_no_zeros.median(axis=0)
-0.7717
q4_one_sentence_per_row_violence = pd.DataFrame(q4_one_sentence_per_row[q4_one_sentence_per_row.text.str.contains("violence")])
q4_one_sentence_per_row_violence_vader_no_zeros = q4_one_sentence_per_row_violence.vader[q4_one_sentence_per_row_violence['vader'] != float(0.000000)]
q4_one_sentence_per_row_violence_vader_no_zeros.describe()
count 176.000000 mean -0.692290 std 0.303607 min -0.963300 25% -0.886000 50% -0.802000 75% -0.624900 max 0.891000 Name: vader, dtype: float64
q4_one_sentence_per_row_violence_vader_no_zeros.median(axis=0)
-0.802
Portfolio-Nigeria_News_Sentiment_Analysis-Viz.html
file that is also in the folder "nlp-topic_modeling_&_sentiment_analysis-nigeria_news-2019"
in my GitHub "portfolio"
repository:¶