Sentimen2.ipynb - Colaboratory
Sentimen2.ipynb - Colaboratory
ipynb - Colaboratory
1 from google.colab import drive
2 drive.mount('/content/drive')
Mounted at /content/drive
1 import pandas as pd
2 import numpy as np
3 import re
4 import nltk
5 from nltk.corpus import stopwords
6
7
8
9 from numpy import array
10 from keras.preprocessing.text import one_hot
11 from keras.preprocessing.sequence import pad_sequences
12 from keras.models import Sequential
13 from keras.layers.core import Activation, Dropout, Dense
14 from keras.layers import Flatten
15 from keras.layers import GlobalMaxPooling1D
16
17 from keras.layers.embeddings import Embedding
18 from sklearn.model_selection import train_test_split
19 from keras.preprocessing.text import Tokenizer
1 #input data
2 data = pd.read_excel('/content/drive/MyDrive/sentiment.xlsx')
3 data.isnull().values.any()
4 data.shape
5
(1005, 2)
1
#Menampilkan Data 5 baris
2
data.head()
content sentiment
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 1/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
1
#contoh menampilkan data attribut dari record 3
2
data["content"][3]
1
#Seaborn merupakan pustaka visualisasi data
2
import seaborn as sns
3
sns.countplot(x='sentiment', data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x7f6880c4a210>
1
#preprosesing data
2
def preprocess_text(sen):
3
#Removing html tags
4
sentence = remove_tags(sen)
6
#removing punctuation and numbers
7
sentence = re.sub('[^a-zA-Z]', ' ', sentence)
9
#single character removal
10
sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)
11
12
13
14
#Removing mutiple spaces
15
sentence = re.sub(r'\s+', ' ', sentence)
16
17
return sentence
1
#eretan karakter yang digunakan untuk pencarian string atau teks dengan menggunakan pola (
2
import re
3
TAG_RE = re.compile(r'<[^>]+>')
4
def remove_tags(text):
5
return TAG_RE.sub('', text)
1
#membuat array list
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 2/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
2
X = []
3
sentences = list(data['content'])
4
for sen in sentences:
5
X.append(preprocess_text(sen))
1
X[0]
'Saya baru kemaren instal aplikasi ini langsung Bisa dibuka Tapi hari ini buka lagi ko g
ncul layar putih Ditunggu lama banget sampe saya uinstall lalu instal lagi tetep aja kek
iperbaiki dong Ga malah memudahkan malah ribet jadinya Kita komplain juga ga malah dibal
1
#membuat array dari sentimen data
2
y = data['sentiment']
3
y = np.array(list(map(lambda x: 1 if x=="positif" else 0, y)))
1
#split data untuk membagi data training dan data testing
2
from sklearn.model_selection import train_test_split
3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
4
print('Data Train : ',X_train)
5
print('Data Test : ',X_test)
Data Train : ['Sangat membantu dalam pembelian tiket kereta api tanpa harus antri di lo
Data Test : ['mau daftar susah ga dapet verifikasi ', 'Kak ini gimna pesen tiket udah d
1
#memisahkan teks menjadi potongan-potongan berupa token, bisa berupa potongan huruf, kata,
2
tokenizer = Tokenizer(num_words=5000)
3
tokenizer.fit_on_texts(X_train)
5
X_train = tokenizer.texts_to_sequences(X_train)
6
X_test = tokenizer.texts_to_sequences(X_test)
1
#adding 1 because of reserved 0 index
2
vocab_size = len(tokenizer.word_index) + 1
3
maxlen = 100
4
X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)
5
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)
1
from numpy import array
2
from numpy import asarray
3
from numpy import zeros
5
embeddings_dictionary = dict()
6
glove_file =open('/content/drive/MyDrive/glove.6B.100d.txt', encoding="utf8")
7
for line in glove_file:
8
records =line.split()
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 3/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
9
word = records[0]
10
vector_dimensions = asarray(records[1:], dtype='float32')
11
embeddings_dictionary[word] = vector_dimensions
12
glove_file.close()
1
embedding_matrix = zeros((vocab_size, 100))
2
for word, index in tokenizer.word_index.items():
3
embedding_vector = embeddings_dictionary.get(word)
4
if embedding_vector is not None:
5
embedding_matrix[index] = embedding_vector
1
#Dekrasi untuk Model CNN
2
from keras.models import Model
3
from keras.layers import Conv1D, Dense, MaxPool1D, Flatten, Input
4
from tensorflow.keras.optimizers import Adam
5
from tensorflow.keras.utils import plot_model
7
import numpy as np
8
model = Sequential()
1
#membangun model CNN
2
embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=maxl
3
model.add(embedding_layer)
4
model.add(Conv1D(128, 5, activation='relu'))
5
model.add(GlobalMaxPooling1D())
6
model.add(Dense(1, activation='sigmoid'))
7
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
1
import os
2
os.mkdir("/content/simpan_model")
1
#Menampilkan hasil pembuatan MOdel
2
model.summary()
3
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=False, rankdir='
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 4/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
Model: "sequential_18"
_________________________________________________________________
=================================================================
obalMaxPooling1D)
=================================================================
_________________________________________________________________
1
#melakukan Training
2
import pandas as pd
3
history = model.fit(X_train, y_train, validation_data=(X_test,y_test), batch_size=128, epo
4
model.save("mymodel.h5")
Epoch 1/10
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 5/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
Epoch 10/10
1
#testing model
2
score = model.evaluate(X_train, y_train)
3
print('Test Accuracy: %f' % (acc*100))
1
# evaluate
2
score = model.evaluate(X_test, y_test, verbose=1)
3
print('Test Accuracy: %f' % (acc*100))
1
# Evaluating the model's accuracy
2
cnn_score = model.evaluate(X_test, y_test, verbose=1)
3
print("\nTEST SCORES OF MODEL")
4
print("Test loss:", cnn_score[0])
5
print("Test Accuracy:", cnn_score[1])
1
# Final Results
2
print("Convolution Neural Network")
3
print("Traning Accuracy : {}" .format(history.history['acc']))
4
print("Validation Accuracy : {}" .format(history.history['val_acc']))
5
print("\nTraning Loss : {}" .format(history.history['loss']))
6
print("Validation Loss : {}" .format(history.history['val_loss']))
1
#visualisasi data hasil
3
import matplotlib.pyplot as plt
4
plt.plot(history.history['acc'])
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 6/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
6
plt.plot(history.history['val_acc'])
8
plt.title('model accuracy')
9
plt.ylabel('accuracy')
10
plt.xlabel('epoch')
11
plt.legend(['train', 'test'], loc='upper left')
12
plt.show()
13
14
plt.plot(history.history['loss'])
15
16
plt.plot(history.history['val_loss'])
17
18
plt.title('model loss')
19
plt.ylabel('loss')
20
plt.xlabel('epoch')
21
plt.legend(['train', 'test'], loc='upper left')
22
plt.show()
1
y_pred=model.predict(X_test)
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 7/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
2
y_pred
array([[1.0196775e-02],
[1.1258881e-04],
[3.4761429e-04],
[7.8367775e-05],
[6.1708874e-05],
[2.1058321e-04],
[2.8647717e-05],
[2.8658837e-02],
[4.4444203e-04],
[2.8818846e-04],
[2.9569298e-02],
[4.2870186e-05],
[3.8734078e-04],
[6.0484995e-06],
[6.9963932e-03],
[3.5810471e-03],
[7.5332224e-03],
[5.1296443e-02],
[2.4898305e-05],
[1.4734268e-04],
[4.5093942e-01],
[3.1632185e-04],
[9.2998516e-06],
[1.6399613e-06],
[1.4100856e-01],
[6.1747432e-04],
[3.7493706e-03],
[4.9095255e-05],
[6.7559566e-05],
[1.5625358e-04],
[2.2199750e-04],
[1.0730326e-03],
[1.7580940e-05],
[8.2913339e-03],
[4.5093942e-01],
[1.9695372e-05],
[1.0849029e-02],
[4.0538736e-05],
[5.5712094e-06],
[1.4087558e-04],
[2.2138178e-02],
[9.9324882e-03],
[8.4398496e-05],
[7.3028594e-02],
[3.8266182e-04],
[1.6280830e-05],
[3.2701031e-05],
[1.3604760e-04],
[3.7491322e-04],
[6.3771009e-04],
[1.4735192e-02],
[1.5205215e-05],
[2.3714689e-05],
[3.7834048e-04],
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 8/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
[7.9051220e-05],
[2.8041005e-03],
[5.7262852e-05],
[2.0313615e-05],
1
y_pred=np.argmax(X_test,axis=1)
2
y_pred
1
from sklearn.metrics import confusion_matrix,classification_report
2
import seaborn as sns
1
cm=confusion_matrix(y_test,y_pred)
2
print("Accuracy : {:.3f}%".format(acc*100))
3
print("Loss : {:.3f}".format(loss))
5
print("\nClassification Report :\n")
6
ax=sns.heatmap(cm,annot=True,cmap='Blues',fmt=' ')
7
ax.set_title('Confusion Matrix')
8
ax.set_xlabel('y_test')
9
ax.set_ylabel('y_pred')
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 9/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
Accuracy : 100.000%
Loss : 0.010
Classification Report :
1
print(classification_report(y_test,y_pred))
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: Undefine
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: Undefine
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1308: Undefine
_warn_prf(average, modifier, msg_start, len(result))
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 10/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
1
# Getting input from user
2
print("\nMAKING PREDICTION ON SINGLE INSTANCE USING THREE MODELS THAT WE HAVE BUILT")
3
reviewNo = int(input("\nEnter the review number whose sentiment is to be predcited"))
4
reviewText = X[reviewNo]
5
print("\nCONTENT OF THE REVIEW CHOSEN IS")
6
print(reviewText)
8
# Converting text to numeric form
9
#Using the tokenizer built earlier
10
#Since we have trained with a list of reviews and now we are feeding in a string, we need
11
reviewProcessed = tokenizer.texts_to_sequences(reviewText)
12
print("\nINTERGER SEQUENCE OF THE REVIEW CHOSEN IS")
13
print(reviewProcessed)
14
15
flat_list = []
16
for sublist in reviewProcessed:
17
for item in sublist:
18
flat_list.append(item)
19
#Making the entire items as a single list
20
flat_list = [flat_list]
21
#Padding to fit into model
22
reviewSequence = pad_sequences(flat_list, padding='post', maxlen=maxlen)
MAKING PREDICTION ON SINGLE INSTANCE USING THREE MODELS THAT WE HAVE BUILT
Udah punya akun mau login kalimatnya aduh timeout Lah ini kai access atau lagu dangdut K
[[], [2201], [], [], [], [1424], [], [], [], [], [], [], [], [], [], [], [], [], [], [],
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 11/12
12/2/21, 9:51 AM sentimen2.ipynb - Colaboratory
https://colab.research.google.com/github/BudiAtu/BudiAtu.github.io/blob/main/sentimen2.ipynb#printMode=true 12/12