Word2Vec code
Word2Vec code
# define hyperparameters
MAX_SEQ_LENGTH = 100
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
FILTER_SIZES = [3, 4, 5]
NUM_FILTERS = 128
LSTM_UNITS = 64
DENSE_UNITS = 1
DROPOUT_RATE = 0.5
PATIENCE = 10
EPOCHS = 50
print('config done')
# load data
df =
pd.read_csv("/kaggle/input/sentiment140/training.1600000.processed.noemoticon.csv",
encoding='latin-1', header=None)
df.columns = ["sentiment", "id", "date", "query", "user", "text"]
df = df[["sentiment", "text"]]
df["sentiment"] = df["sentiment"].replace({0: "negative", 4: "positive"})
texts = df["text"].values
labels = df["sentiment"].values
labels = np.array([1 if label == "positive" else 0 for label in labels])
print(df.head(10))
# Plot accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()