4.10. Text Data Pre-Processing - Use Case - Ipynb - Colaboratory
4.10. Text Data Pre-Processing - Use Case - Ipynb - Colaboratory
import numpy as np 1 1 FLYNN: Hillary Clinton, Big Woman on Campus - ... Daniel J. Flynn Ever get the feeling your life circles the rou... 0 Danie
import pandas as pd 2 2 Why the Truth Might Get You Fired Consortiumnews.com Why the Truth Might Get You Fired October 29, ... 1 Consortium
import re
import nltk 3 3 feature
# separating 15 and
Civilians Killed In Single US Airstrike Hav...
target Jessica Purkiss Videos 15 Civilians Killed In Single US Airstr... 1 Jes
from nltk.corpus import stopwords X = news_data.drop(columns='label', axis =1)
from nltk.stem.porter import PorterStemmer 4 4 Iranian woman jailed for fictional unpublished... Howard Portnoy Print \nAn Iranian woman has been sentenced to... 1 Howa
Y = news_data['label']
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
print(X)
3 3 15 Civilians Killed In Single US Airstrike Hav... Jessica Purkiss Videos 15 Civilians Killed In Single US Airstr... 1 Stemming:
4 4 Iranian woman jailed for fictional unpublished... Howard Portnoy Print \nAn Iranian woman has been sentenced to... 1 Stemming is the process of reducing a word to its Root Word
print(X)
['darrel lucu hous dem aid even see comey letter jason chaffetz tweet'
'daniel j flynn flynn hillari clinton big woman campu breitbart'
'consortiumnew com truth might get fire' ...
'michael j de la merc rachel abram maci said receiv takeov approach hudson bay new york time'
'alex ansari nato russia hold parallel exercis balkan'
'david swanson keep f aliv']
print(Y)
[1 0 1 ... 0 1 1]
Y.shape
(20800,)
X = vectorizer.transform(X)
print(X)