Another Copy of Ensemble Models Original Paid
Another Copy of Ensemble Models Original Paid
import pandas as pd
import matplotlib.pyplot as plt
Mounted at /content/drive
#Reading the data, Adding column names to it, and merging it as one
(1266, 16)
(1266, 16)
(1283, 16)
(3815, 16)
POF context
0 0 a news conference
1 61 comments on ABC's This Week.
2 1 a radio show
3 7 a web video
4 0 a campaign website
text
0 jobs wisconsin is on pace to double the number...
1 military,veterans,voting-record says john mcca...
2 medicare,message-machine-2012,campaign-adverti...
3 campaign-finance,legal-issues,campaign-adverti...
4 federal-budget,pensions,retirement over the pa...
#Removing punctuations except comma, any links and any extra white spaces
print(df.head())
text
0 jobs wisconsin is on pace to double the number...
1 military,veterans,votingrecord says john mccai...
2 medicare,messagemachine2012,campaignadvertisin...
3 campaignfinance,legalissues,campaignadvertisin...
4 federalbudget,pensions,retirement over the pas...
True
text
0 [jobs, wisconsin, is, on, pace, to, double, th...
1 [military, veterans, votingrecord, says, john,...
2 [medicare, messagemachine2012, campaignadverti...
3 [campaignfinance, legalissues, campaignadverti...
4 [federalbudget, pensions, retirement, over, th...
text
0 [job, wisconsin, is, on, pace, to, double, the...
1 [military, veteran, votingrecord, say, john, m...
2 [medicare, messagemachine2012, campaignadverti...
3 [campaignfinance, legalissues, campaignadverti...
4 [federalbudget, pension, retirement, over, the...
text
0 [job, wisconsin, pace, double, number, layoff,...
1 [military, veteran, votingrecord, say, john, m...
2 [medicare, messagemachine2012, campaignadverti...
3 [campaignfinance, legalissues, campaignadverti...
4 [federalbudget, pension, retirement, past, fiv...
#Visualization of data
#Running PCA
text_vector_tfidf_copy=text_vector_tfidf.copy()
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit_transform(text_vector_tfidf_copy.toarray())
plt.figure(figsize=(10, 7))
plt.scatter(pca.components_[0], pca.components_[1])
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.title('PCA on tfidf vector')
plt.show()
text_vector_bow_copy=text_vector_bow.copy()
pca = PCA(n_components=2)
pca.fit_transform(text_vector_bow_copy.toarray())
plt.figure(figsize=(10, 7))
plt.scatter(pca.components_[0], pca.components_[1])
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.title('PCA on bow vector')
plt.show()
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_text_vector_tfidf_tsne_copy=text_vector_tfidf.copy()
tsne_text_vector_tfidf_tsne_copy =
tsne.fit_transform(tsne_text_vector_tfidf_tsne_copy.toarray())
plt.figure(figsize=(10, 7))
import seaborn as sns
sns.scatterplot(x=tsne_text_vector_tfidf_tsne_copy[:,0],
y=tsne_text_vector_tfidf_tsne_copy[:,1], hue=df['label'])
plt.title('TSNE on tfidf vector')
plt.show()
text_vector_tfidf = text_vector_tfidf.toarray()
text_vector_bow = text_vector_bow.toarray()
label_vector = df['label'].values
speaker_vector = df['speaker'].values
party_vector = df['Party'].values
label_vector = label_vector.reshape(-1, 1)
speaker_vector = speaker_vector.reshape(-1, 1)
party_vector = party_vector.reshape(-1, 1)
(3815, 2001)
(3815, 2001)
(3815, 2003)
(3815, 2003)
dataF1=pd.DataFrame(dataF1)
dataF2=pd.DataFrame(dataF2)
dataF3=pd.DataFrame(dataF3)
dataF4=pd.DataFrame(dataF4)
print(dataF1.head())
for i in range(2000):
dataF1.rename(columns={i: 'tfidf'+str(i)}, inplace=True)
dataF2.rename(columns={i: 'bow'+str(i)}, inplace=True)
dataF3.rename(columns={i: 'tfidf'+str(i)}, inplace=True)
dataF4.rename(columns={i: 'bow'+str(i)}, inplace=True)
dataF1.rename(columns={2000: 'label'}, inplace=True)
dataF2.rename(columns={2000: 'label'}, inplace=True)
dataF3.rename(columns={2000: 'label'}, inplace=True)
dataF4.rename(columns={2000: 'label'}, inplace=True)
dataF3.rename(columns={2001: 'speaker'}, inplace=True)
dataF4.rename(columns={2001: 'speaker'}, inplace=True)
dataF3.rename(columns={2002: 'party'}, inplace=True)
dataF4.rename(columns={2002: 'party'}, inplace=True)
from sklearn.preprocessing import LabelEncoder
le3=LabelEncoder()
le4=LabelEncoder()
dataF3['speaker']=le3.fit_transform(dataF3['speaker'])
dataF4['speaker']=le4.fit_transform(dataF4['speaker'])
dataF3['party']=le3.fit_transform(dataF3['party'])
dataF4['party']=le4.fit_transform(dataF4['party'])
print(dataF1.head())
print(dataF3.head())
speaker party
0 570 5
1 269 15
2 865 15
3 944 5
4 113 15
Predictions: ['1' '1' '1' '1' '0' '0' '1' '1' '1' '0' '1' '0' '1' '1'
'1' '0' '1' '0'
'0' '0' '1' '0' '1' '0' '1' '1' '1' '0' '1' '1' '1' '1' '1' '0' '0'
'0'
'0' '1' '1' '1' '0' '0' '1' '0' '1' '1' '1' '0' '1' '0' '1' '0' '1'
'0'
'1' '0' '0' '1' '1' '1' '1' '0' '1' '0' '1' '1' '1' '0' '1' '1' '0'
'1'
'1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '0' '0' '0' '1' '0' '0'
'0'
'1' '0' '0' '0' '0' '1' '0' '1' '0' '1' '1' '0' '1' '0' '1' '1' '1'
'1'
'0' '0' '0' '0' '0' '0' '1' '0' '1' '1' '1' '1' '0' '1' '0' '0' '0'
'0'
'1' '1' '1' '0' '1' '0' '1' '1' '1' '1' '1' '0' '1' '1' '0' '1' '0'
'0'
'0' '0' '0' '1' '0' '0' '0' '1' '0' '1' '0' '1' '1' '0' '1' '1' '1'
'0'
'1' '0' '0' '1' '1' '0' '0' '1' '0' '0' '0' '0' '1' '1' '0' '1' '0'
'1'
'1' '0' '0' '1' '1' '1' '1' '1' '0' '1' '1' '0' '0' '0' '1' '1' '0'
'0'
'1' '0' '1' '1' '1' '0' '0' '1' '0' '1' '0' '0' '0' '0' '0' '0' '0'
'1'
'0' '1' '1' '0' '0' '0' '0' '0' '1' '1' '0' '0' '1' '0' '0' '1' '0'
'1'
'1' '1' '0' '1' '1' '1' '1' '0' '1' '0' '0' '0' '0' '0' '0' '1' '1'
'1'
'1' '1' '1' '1' '0' '1' '1' '0' '1' '1' '0' '0' '1' '1' '0' '0' '1'
'1'
'1' '1' '1' '0' '1' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '0' '1'
'1'
'1' '0' '1' '1' '0' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '1' '1'
'1'
'1' '1' '1' '1' '0' '1' '0' '1' '0' '0' '1' '0' '1' '1' '0' '0' '1'
'1'
'1' '0' '1' '1' '1' '0' '0' '1' '0' '1' '0' '1' '1' '1' '0' '1' '0'
'1'
'1' '0' '0' '1' '0' '0' '0' '0' '1' '0' '1' '1' '1' '1' '0' '1' '0'
'0'
'1' '1' '0' '1' '1' '1' '0' '1' '0' '1' '1' '0' '0' '1' '1' '0' '1'
'0'
'0' '0' '1' '0']
First prediction: 1
The news is true.
Class labels: ['0' '1']
# Check the truth value of the first element based on the model's
class mapping
if predictions[0] == '0':
print('The news is fake.')
elif predictions[0] == '1':
print('The news is true.')
else:
print('Unexpected prediction value.')
# Alternatively, you can check the class labels used in training
class_labels = loaded_model.classes_
print("Class labels:", class_labels)
Predictions: ['1' '0' '1' '1' '0' '0' '0' '0' '1' '0' '1' '1' '1' '0'
'1' '0' '1' '0'
'0' '1' '1' '0' '1' '0' '1' '1' '0' '0' '0' '1' '0' '1' '1' '0' '0'
'1'
'0' '0' '1' '0' '0' '0' '1' '1' '1' '0' '1' '0' '1' '0' '0' '0' '1'
'0'
'1' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0' '1' '1' '0' '0' '1' '0'
'1'
'0' '1' '1' '0' '1' '0' '0' '1' '1' '0' '0' '0' '0' '1' '1' '0' '0'
'0'
'1' '0' '0' '1' '1' '1' '0' '1' '1' '1' '1' '0' '0' '0' '1' '1' '0'
'1'
'0' '0' '0' '0' '0' '0' '1' '0' '1' '1' '0' '1' '1' '1' '0' '0' '1'
'0'
'1' '0' '0' '0' '0' '0' '1' '1' '1' '1' '1' '1' '0' '1' '0' '1' '0'
'0'
'0' '0' '0' '1' '0' '0' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '1'
'0'
'1' '0' '0' '1' '1' '0' '0' '1' '0' '0' '1' '0' '1' '1' '0' '0' '0'
'1'
'1' '0' '1' '1' '1' '1' '1' '0' '0' '0' '1' '1' '1' '0' '1' '0' '0'
'1'
'0' '1' '1' '1' '0' '0' '0' '1' '0' '1' '0' '0' '0' '1' '0' '0' '0'
'1'
'0' '1' '1' '0' '0' '0' '0' '0' '0' '1' '0' '0' '1' '0' '0' '1' '0'
'1'
'0' '1' '1' '1' '1' '1' '1' '0' '1' '0' '0' '0' '0' '1' '1' '1' '1'
'1'
'0' '1' '0' '1' '0' '1' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0'
'0'
'0' '0' '1' '0' '1' '0' '0' '1' '1' '0' '0' '1' '0' '0' '0' '0' '1'
'1'
'1' '0' '1' '1' '0' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '1' '0'
'1'
'1' '1' '1' '1' '0' '1' '0' '1' '0' '0' '1' '0' '0' '1' '0' '0' '0'
'1'
'1' '0' '1' '1' '1' '0' '0' '1' '0' '1' '0' '0' '0' '0' '1' '1' '1'
'1'
'1' '1' '1' '1' '0' '0' '0' '0' '1' '0' '1' '0' '0' '0' '0' '0' '1'
'1'
'1' '0' '1' '0' '1' '1' '0' '1' '0' '0' '1' '0' '0' '0' '1' '0' '1'
'0'
'0' '1' '1' '0']
First prediction: 1
The news is true.
Class labels: ['0' '1']
# Check the truth value of the first element based on the model's
class mapping
if predictions[0] == '0':
print('The news is fake.')
elif predictions[0] == '1':
print('The news is true.')
else:
print('Unexpected prediction value.')
Predictions: ['1' '1' '1' '0' '0' '0' '1' '1' '1' '0' '1' '0' '0' '1'
'1' '0' '1' '1'
'1' '0' '1' '1' '1' '0' '1' '1' '1' '0' '1' '1' '1' '1' '1' '0' '0'
'0'
'1' '1' '1' '1' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0' '0' '0' '1'
'1'
'1' '0' '1' '1' '1' '0' '1' '0' '1' '0' '0' '1' '1' '0' '1' '0' '0'
'1'
'1' '1' '1' '1' '1' '0' '1' '1' '0' '1' '1' '1' '0' '1' '1' '1' '0'
'0'
'1' '1' '1' '0' '0' '1' '1' '1' '1' '1' '1' '0' '1' '1' '1' '1' '1'
'0'
'0' '0' '0' '1' '0' '0' '0' '0' '0' '1' '1' '1' '0' '0' '0' '0' '1'
'0'
'1' '1' '1' '0' '0' '1' '0' '1' '1' '1' '0' '1' '1' '1' '0' '1' '1'
'1'
'0' '0' '0' '1' '0' '0' '0' '1' '0' '1' '0' '1' '1' '1' '1' '1' '1'
'0'
'1' '1' '0' '1' '1' '0' '0' '1' '0' '0' '1' '0' '1' '1' '0' '0' '0'
'1'
'1' '0' '0' '1' '1' '0' '1' '0' '0' '1' '1' '0' '0' '1' '1' '1' '1'
'1'
'1' '0' '1' '1' '1' '1' '1' '1' '0' '1' '0' '1' '0' '0' '0' '0' '0'
'1'
'0' '0' '1' '0' '0' '0' '0' '0' '1' '0' '0' '0' '1' '0' '0' '1' '0'
'1'
'1' '1' '1' '0' '0' '1' '0' '0' '1' '0' '0' '1' '1' '1' '1' '0' '0'
'1'
'0' '1' '1' '1' '0' '0' '0' '1' '1' '1' '0' '0' '0' '0' '0' '1' '1'
'1'
'1' '1' '0' '0' '1' '0' '0' '1' '1' '1' '0' '1' '0' '0' '0' '0' '1'
'1'
'1' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '1' '1'
'1'
'1' '1' '1' '0' '0' '1' '1' '1' '0' '1' '1' '0' '1' '1' '0' '0' '1'
'1'
'1' '0' '1' '1' '1' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '1' '1'
'1'
'1' '0' '0' '1' '0' '1' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '1'
'0'
'0' '1' '1' '1' '1' '1' '0' '1' '1' '1' '1' '0' '0' '1' '1' '0' '1'
'0'
'0' '1' '0' '0']
First prediction: 1
The news is true.
Class labels: ['0' '1']
#4.Random Forest Model
----------------------------------------------------------------------
-----
KeyboardInterrupt Traceback (most recent call
last)
<ipython-input-55-5542e53a1d77> in <cell line: 25>()
23
24 # Call the function and save the result
---> 25 random_forest_grid_learn(X_train4, y_train4, X_val4, y_val4)
26
<ipython-input-55-5542e53a1d77> in random_forest_grid_learn(X_train,
y_train, X_val, y_val)
16 'min_samples_leaf': [1, 2, 4]}
17 grid_search = GridSearchCV(rf, param_grid, cv=3,
refit=True, n_jobs=-1, scoring='accuracy', verbose=1)
---> 18 grid_search.fit(X_train, y_train)
19 # Save the trained model
20 Pkl_Filename = "Pickle_RandomForest_Model.pkl"
/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_searc
h.py in fit(self, X, y, groups, **fit_params)
872 return results
873
--> 874 self._run_search(evaluate_candidates)
875
876 # multimetric is determined here because in the
case of a callable
/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_searc
h.py in _run_search(self, evaluate_candidates)
1386 def _run_search(self, evaluate_candidates):
1387 """Search all candidates in param_grid"""
-> 1388 evaluate_candidates(ParameterGrid(self.param_grid))
1389
1390
/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_searc
h.py in evaluate_candidates(candidate_params, cv, more_results)
819 )
820
--> 821 out = parallel(
822 delayed(_fit_and_score)(
823 clone(base_estimator),
/usr/local/lib/python3.10/dist-packages/sklearn/utils/parallel.py in
__call__(self, iterable)
61 for delayed_func, args, kwargs in iterable
62 )
---> 63 return super().__call__(iterable_with_config)
64
65
/usr/local/lib/python3.10/dist-packages/joblib/parallel.py in
__call__(self, iterable)
1950 next(output)
1951
-> 1952 return output if self.return_generator else
list(output)
1953
1954 def __repr__(self):
/usr/local/lib/python3.10/dist-packages/joblib/parallel.py in
_get_outputs(self, iterator, pre_dispatch)
1593
1594 with self._backend.retrieval_context():
-> 1595 yield from self._retrieve()
1596
1597 except GeneratorExit:
/usr/local/lib/python3.10/dist-packages/joblib/parallel.py in
_retrieve(self)
1705 (self._jobs[0].get_status(
1706 timeout=self.timeout) == TASK_PENDING)):
-> 1707 time.sleep(0.01)
1708 continue
1709
KeyboardInterrupt:
# Check the truth value of the first element based on the model's
class mapping
if predictions[0] == '0':
print('The news is fake.')
elif predictions[0] == '1':
print('The news is true.')
else:
print('Unexpected prediction value.')
#5.ADABoost Model
# Check the truth value of the first element based on the model's
class mapping
if predictions[0] == '0':
print('The news is fake.')
elif predictions[0] == '1':
print('The news is true.')
else:
print('Unexpected prediction value.')
----------------------------------------------------------------------
-----
KeyboardInterrupt Traceback (most recent call
last)
<ipython-input-56-4120cac957bd> in <cell line: 24>()
22
23 # Call the function and save the result
---> 24 svm_grid_learn(X_train4, y_train4, X_val4, y_val4)
/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_searc
h.py in fit(self, X, y, groups, **fit_params)
872 return results
873
--> 874 self._run_search(evaluate_candidates)
875
876 # multimetric is determined here because in the
case of a callable
/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_searc
h.py in _run_search(self, evaluate_candidates)
1386 def _run_search(self, evaluate_candidates):
1387 """Search all candidates in param_grid"""
-> 1388 evaluate_candidates(ParameterGrid(self.param_grid))
1389
1390
/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_searc
h.py in evaluate_candidates(candidate_params, cv, more_results)
819 )
820
--> 821 out = parallel(
822 delayed(_fit_and_score)(
823 clone(base_estimator),
/usr/local/lib/python3.10/dist-packages/sklearn/utils/parallel.py in
__call__(self, iterable)
61 for delayed_func, args, kwargs in iterable
62 )
---> 63 return super().__call__(iterable_with_config)
64
65
/usr/local/lib/python3.10/dist-packages/joblib/parallel.py in
__call__(self, iterable)
1950 next(output)
1951
-> 1952 return output if self.return_generator else
list(output)
1953
1954 def __repr__(self):
/usr/local/lib/python3.10/dist-packages/joblib/parallel.py in
_get_outputs(self, iterator, pre_dispatch)
1593
1594 with self._backend.retrieval_context():
-> 1595 yield from self._retrieve()
1596
1597 except GeneratorExit:
/usr/local/lib/python3.10/dist-packages/joblib/parallel.py in
_retrieve(self)
1705 (self._jobs[0].get_status(
1706 timeout=self.timeout) == TASK_PENDING)):
-> 1707 time.sleep(0.01)
1708 continue
1709
KeyboardInterrupt:
# Check the truth value of the first element based on the model's
class mapping
if predictions[0] == '0':
print('The news is fake.')
elif predictions[0] == '1':
print('The news is true.')
else:
print('Unexpected prediction value.')
#7.MLP Model
# Check the truth value of the first element based on the model's
class mapping
if predictions[0] == '0':
print('The news is fake.')
elif predictions[0] == '1':
print('The news is true.')
else:
print('Unexpected prediction value.')
# MLP model
mlp_model = MLPClassifier(random_state=0)
param_grid = {'hidden_layer_sizes': [(100,), (50, 50), (50, 30,
10)],
'alpha': [0.0001, 0.001, 0.01]}
# Check the truth value of the first element based on the model's
class mapping
if predictions[0] == '0':
print('The news is fake.')
elif predictions[0] == '1':
print('The news is true.')
else:
print('Unexpected prediction value.')
# MLP model
mlp_model = MLPClassifier(random_state=0)
param_grid = {'hidden_layer_sizes': [(100,), (50, 50), (50, 30,
10)],
'alpha': [0.0001, 0.001, 0.01]}
# Check the truth value of the first element based on the model's
class mapping
if predictions[0] == '0':
print('The news is fake.')
elif predictions[0] == '1':
print('The news is true.')
else:
print('Unexpected prediction value.')
import pickle
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import learning_curve
import matplotlib.pyplot as plt
# Check the truth value of the first element based on the ensemble
model's class mapping
if predictions_ensemble[0] == '0':
print('The news is fake according to the ensemble model.')
elif predictions_ensemble[0] == '1':
print('The news is true according to the ensemble model.')
else:
print('Unexpected prediction value for the ensemble model.')