Reg. No.: 39110009 Colab Notebook Link: Name: Abivirshan Suresh
Reg. No.: 39110009 Colab Notebook Link: Name: Abivirshan Suresh
Add the cleaned (after removal of URLs, Mentions) tweets to a new column
named with ‘new’.
Remove hyperlinks, Twitter marks and styles
Tokenization — Tokenize the given Strings
Stemming - Reducing the Size of vocabulary.
from google.colab import files
uploaded=files.upload()
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
df=pd.read_csv("tweet.csv")
df=df.drop(labels=["id","label"],axis=1)
df.head()
tweet
def remove_pattern(input_txt, pattern):
r = re.findall(pattern, input_txt)
for i in r:
input_txt = re.sub(i, '', input_txt)
return input_txt
df['new']=np.vectorize(remove_pattern)(df['tweet'], "@[\w]*")
df.head()
tweet new
0 @user when a father is dysfunctional and is s... when a father is dysfunctional and is so sel...
1 @user @user thanks for #lyft credit i can't us... thanks for #lyft credit i can't use cause th...
3 #model i love u take with u all the time in ... #model i love u take with u all the time in ...
Removing links
df['new'] =df['new'].apply(lambda x:' '.join([w for w in x.split() if '.com' not in w]))
df.head()
tweet new
0 @user when a father is dysfunctional and is s... when a father is dysfunctional and is so selfi...
1 @user @user thanks for #lyft credit i can't us... thanks for #lyft credit i can't use cause they...
3 #model i love u take with u all the time in ... #model i love u take with u all the time in ur...
df['new'] =df['new'].str.replace("[^a-zA-Z]", " ")
df.head()
tweet new
0 @user when a father is dysfunctional and is s... when a father is dysfunctional and is so selfi...
Tokenization
1 @user @user thanks for #lyft credit i can't us... thanks for lyft credit i can t use cause they...
3
import nltk
#model i love u take with u all the time in ... model i love u take with u all the time in ur...
nltk.download('punkt')
from nltk.tokenize import word_tokenize
tf['tokens']=df['new'].apply(lambda x: word_tokenize(x.lower()))
tf.head()
tokens
Stemming
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
tf['tokens']=tf['tokens'].apply(lambda x: [stemmer.stem(i) for i in x])
tf.head()
tokens
Lemmatization
import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
tf['tokens'].apply(lambda x: [lemmatizer.lemmatize(i) for i in x])
tf.head()
tokens
Removal of Stopwords
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
tf['tokens']=tf['tokens'].apply(lambda x: [ i for i in x if(i not in stop_words)])
tf.head()
tokens
2 [bihday, majesti]
tf['tokens']=tf['tokens'].apply(lambda x:' '.join([w for w in x if len(w)>3]))
tf.head()
tokens
tf.head()
2 bihday majesti
2 bihday majesti
from nltk.tokenize import word_tokenize
tokens=[]
for i in list(tf.loc[:,'tokens']):
tokens+=word_tokenize(i)
print(tokens)
mpw=[]
for i in set(tokens):
if(tokens.count(i)>500):
mpw.append(i)
print(i,tokens.count(i))
thank 1580
posit 994
bihday 889
smile 930
feel 774
father 957
girl 651
work 803
time 1265
love 3245
healthi 611
need 661
come 642
great 537
take 740
happi 2106
follow 528
week 607
live 591
make 992
best 521
summer 591
friend 760
good 892
like 1249
bull 506
weekend 627
peopl 895
famili 623
life 1176
today 1105
want 779
wait 658
look 730
year 555
friday 540
beauti 663
plt.bar(mpw,[tokens.count(i) for i in mpw])
VISHWAJEET ANAND
40731127
In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [2]:
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
In [3]:
K-Nearest Neighbor(KNN)
In [4]:
Naive Baye's
In [5]:
Decision Tree
In [6]:
Logistic Regression
In [7]:
In [8]:
In [11]:
The accuracy score achieved using Support Vector Machine is: 89.0%
The accuracy score achieved using Gradient Boosting for Regression is: 71.6
7%
Graph visualization
In [12]:
sns.set(rc={'figure.figsize':(20,7)})
plt.xlabel('Algorithms')
plt.ylabel("Accuracy Scores (in %)")
sns.barplot(algorithms,score)
C:\Users\VISHWaAJEET\anaconda3\lib\site-packages\seaborn\_decorators.py:36:
FutureWarning: Pass the following variables as keyword args: x, y. From vers
ion 0.12, the only valid positional argument will be `data`, and passing oth
er arguments without an explicit keyword will result in an error or misinter
pretation.
warnings.warn(
Out[12]:
VISHWAJEET ANAND
40731127
In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [2]:
dataset = pd.read_csv('Salary_Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
In [3]:
In [4]:
Out[4]:
LinearRegression()
Making predictions
In [5]:
y_pred = regressor.predict(X_test)
In [13]:
In [7]:
[4.83847542e+08 4.46019390e+08]]
Choose Files no files selected Upload widget is only available when the cell has been
executed in the current browser session. Please rerun this cell to enable.
Saving housing_price.csv to housing_price.csv
import pandas as pd
import numpy as np
df=pd.read_csv('housing_price.csv')
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 1 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
print(df.shape,df.size,df.ndim)
Suburb 0
Address 0
Rooms 0
Type 0
Price 7610
Method 0
SellerG 0
Date 0
Distance 1
Postcode 1
Bedroom2 8217
Bathroom 8226
Car 8728
Landsize 11810
BuildingArea 21115
YearBuilt 19306
CouncilArea 3
Lattitude 7976
Longtitude 7976
Regionname 3
Propertycount 3
dtype: int64
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 2 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
df.columns
21
df.head(10)
68 Studley
0 Abbotsford 2 h NaN SS Jellis 3/09/2016
St
85 Turner
1 Abbotsford 2 h 1480000.0 S Biggin 3/12/2016
St
25
2 Abbotsford Bloomburg 2 h 1035000.0 S Biggin 4/02/2016
St
18/659
3 Abbotsford 3 u NaN VB Rounds 4/02/2016
Victoria St
5 Charles
4 Abbotsford 3 h 1465000.0 SP Biggin 4/03/2017
St
40
5 Abbotsford Federation 3 h 850000.0 PI Biggin 4/03/2017
La
55a Park
6 Abbotsford 4 h 1600000.0 VB Nelson 4/06/2016
St
16 Maugie
7 Abbotsford 4 h NaN SN Nelson 6/08/2016
St
53 Turner
8 Abbotsford 2 h NaN S Biggin 6/08/2016
St
99 Turner
9 Abbotsford 2 h NaN S Collins 6/08/2016
St
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 3 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
df.isnull().sum()
Suburb 0
Address 0
Rooms 0
Type 0
Price 7610
Method 0
SellerG 0
Date 0
Distance 78
Postcode 1
Bedroom2 8234
Bathroom 8272
Car 10359
Landsize 14247
BuildingArea 21191
YearBuilt 19306
CouncilArea 3
Lattitude 7976
Longtitude 7976
Regionname 3
Propertycount 3
dtype: int64
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 4 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
df.head(10)
68 Studley
0 Abbotsford 2 h NaN SS Jellis 3/09/2016
St
85 Turner
1 Abbotsford 2 h 1480000.0 S Biggin 3/12/2016
St
25
2 Abbotsford Bloomburg 2 h 1035000.0 S Biggin 4/02/2016
St
18/659
3 Abbotsford 3 u NaN VB Rounds 4/02/2016
Victoria St
5 Charles
4 Abbotsford 3 h 1465000.0 SP Biggin 4/03/2017
St
40
5 Abbotsford Federation 3 h 850000.0 PI Biggin 4/03/2017
La
55a Park
6 Abbotsford 4 h 1600000.0 VB Nelson 4/06/2016
St
16 Maugie
7 Abbotsford 4 h NaN SN Nelson 6/08/2016
St
53 Turner
8 Abbotsford 2 h NaN S Biggin 6/08/2016
St
99 Turner
9 Abbotsford 2 h NaN S Collins 6/08/2016
St
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 5 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
df.isnull().sum()
Suburb 0
Address 0
Rooms 0
Type 0
Price 2185
Method 0
SellerG 0
Date 0
Distance 0
Postcode 0
Bedroom2 4
Bathroom 0
Car 630
Landsize 2091
BuildingArea 1255
YearBuilt 284
CouncilArea 0
Lattitude 0
Longtitude 0
Regionname 0
Propertycount 0
dtype: int64
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 6 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
Suburb 0
Address 0
Rooms 0
Type 0
Price 0
Method 0
SellerG 0
Date 0
Distance 0
Postcode 0
Bedroom2 0
Bathroom 0
Car 0
Landsize 0
BuildingArea 0
YearBuilt 0
CouncilArea 0
Lattitude 0
Longtitude 0
Regionname 0
Propertycount 0
dtype: int64
df=df.dropna()
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 7 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
df.isnull().sum()
Suburb 0
Address 0
Rooms 0
Type 0
Price 0
Method 0
SellerG 0
Date 0
Distance 0
Postcode 0
Bedroom2 0
Bathroom 0
Car 0
Landsize 0
BuildingArea 0
YearBuilt 0
CouncilArea 0
Lattitude 0
Longtitude 0
Regionname 0
Propertycount 0
dtype: int64
df.dtypes.value_counts()
float64 12
object 8
int64 1
dtype: int64
print(df.shape,df.size,df.ndim)
x=df.iloc[:,:-1]
y=df.iloc[:,-1]
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 8 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
25
2 Abbotsford Bloomburg 2 h 1.035000e+06 S Biggin 4/02/2016
St
16 Maugie
7 Abbotsford 4 h 1.094868e+06 SN Nelson 6/08/2016
St
35
34849 Wollert Kingscote 3 h 5.700000e+05 SP RW 24/02/2018
Wy
15
34850 Wollert Rockgarden 3 h 1.094868e+06 SP LJ 24/02/2018
Wy
29A Murray
34853 Yarraville 2 h 8.880000e+05 SP Sweeney 24/02/2018
St
147A
34854 Yarraville 2 t 7.050000e+05 S Jas 24/02/2018
Severn St
3
34856 Yarraville Tarrengower 2 h 1.020000e+06 PI RW 24/02/2018
St
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 9 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
2 4019.0
4 4019.0
6 4019.0
7 4019.0
11 4019.0
...
34849 2940.0
34850 2940.0
34853 6543.0
34854 6543.0
34856 6543.0
Name: Propertycount, Length: 13772, dtype: float64
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 10 of 11
39110009 AbiVirshan S.ipynb - Colaboratory 07/01/2022, 11:31 PM
https://colab.research.google.com/drive/1iHoMz00HMxm-PJfBnTy-Bxvao2hVsdbl?usp=sharing#scrollTo=oTjEENKtrkY2 Page 11 of 11
3/17/22, 8:26 PM ML Lab confusion matrix.ipynb - Colaboratory
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
social = pd.read_csv("Social_Network_Ads.csv")
X=social.iloc[:,[2,3]].values
y=social.iloc[:,-1].values
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20)
sc=StandardScaler()
sc.fit(X_train)
X_train=sc.transform(X_train)
X_test=sc.transform(X_test)
svc=SVC(kernel='linear',C=10.0,random_state=1)
svc.fit(X_train,y_train)
SVC(C=10.0, kernel='linear', random_state=1)
y_pred=svc.predict(X_test)
conf_matrix=confusion_matrix(y_true=y_test,y_pred=y_pred)
fig,ax=plt.subplots(figsize=(5,5))
ax.matshow(conf_matrix,cmap=plt.cm.Oranges,alpha=0.3)
https://colab.research.google.com/drive/1ufH6lKaXaUpJnLqEti65Bfw5iOjUopv2#scrollTo=ie-IQCP0XxjV&printMode=true 1/3
3/17/22, 8:26 PM ML Lab confusion matrix.ipynb - Colaboratory
for i in range(conf_matrix.shape[0]):
for j in range(conf_matrix.shape[1]):
ax.text(x=j,y=i,s=conf_matrix[i,j],va='center',size='xx-large')
plt.plot(y_test,y_pred)
plt.xlabel('Predictions',fontsize=18)
plt.ylabel('Actuals',fontsize=18)
plt.title('Confusion Matrix',fontsize=18)
plt.show()
print('Precision: %.3f'%precision_score(y_test,y_pred))
print('Recall: %.3f'%recall score(y test,y pred))
https://colab.research.google.com/drive/1ufH6lKaXaUpJnLqEti65Bfw5iOjUopv2#scrollTo=ie-IQCP0XxjV&printMode=true 2/3
3/17/22, 8:26 PM ML Lab confusion matrix.ipynb - Colaboratory
print( Recall: %.3f %recall_score(y_test,y_pred))
print('Accuracy: %3f'%accuracy_score(y_test,y_pred))
print('fl_score: %.3f'%f1_score(y_test,y_pred))
Precision: 1.000
Recall: 0.645
Accuracy: 0.862500
fl_score: 0.784
https://colab.research.google.com/drive/1ufH6lKaXaUpJnLqEti65Bfw5iOjUopv2#scrollTo=ie-IQCP0XxjV&printMode=true 3/3