0% found this document useful (0 votes)
2 views

gpt-2 code

Uploaded by

valachi b-boy
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

gpt-2 code

Uploaded by

valachi b-boy
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import torch
from sklearn.metrics import confusion_matrix
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, Conv1D, MaxPooling1D, GRU,
Dense, Dropout, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from transformers import GPT2Tokenizer, GPT2Model

print('import done')

# Define hyperparameters
MAX_SEQ_LENGTH = 100
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
FILTER_SIZES = [3, 5, 7]
NUM_FILTERS = 256
GRU_UNITS = 256
DENSE_UNITS = 1
DROPOUT_RATE = 0.8
print('config done')

# Load the dataset


df =
pd.read_csv("/kaggle/input/sentiment140/training.1600000.processed.noemoticon.csv",
encoding='latin-1',
header=None)
df.columns = ["sentiment", "id", "date", "query", "user", "text"]
df = df[["sentiment", "text"]]
df["sentiment"] = df["sentiment"].replace({0: "negative", 4: "positive"})
texts = df["text"].values
labels = df["sentiment"].values
labels = np.array([1 if label == "positive" else 0 for label in labels])
print(df.head(10))

# Preprocess text data


tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gpt_model = GPT2Model.from_pretrained("gpt2").to(device)

def extract_gpt_features(text):
inputs = tokenizer.encode_plus(text, add_special_tokens=True,
return_tensors="pt")
inputs = inputs.to(device)

with torch.no_grad():
outputs = gpt_model(**inputs)[0]

features = outputs[:, 0, :].squeeze(0).cpu().numpy()


return features

sequences = np.array([extract_gpt_features(text) for text in texts])


word_index = None # We don't need word index for GPT-based features
data = pad_sequences(sequences, maxlen=MAX_SEQ_LENGTH)
# Define model architecture
inputs = Input(shape=(MAX_SEQ_LENGTH,))
embedding = Embedding(input_dim=len(word_index) + 1, output_dim=EMBEDDING_DIM,
input_length=MAX_SEQ_LENGTH)(inputs)
conv_layers = []
for filter_size in FILTER_SIZES:
conv = Conv1D(filters=NUM_FILTERS, kernel_size=filter_size, activation='relu')
(embedding)
pool = MaxPooling1D(pool_size=MAX_SEQ_LENGTH - filter_size + 1)(conv)
conv_layers.append(pool)
concat = Concatenate()(conv_layers)
gru = GRU(units=GRU_UNITS)(concat)
dropout = Dropout(rate=DROPOUT_RATE)(gru)
outputs = Dense(units=DENSE_UNITS, activation='sigmoid')(dropout)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model with early stopping


es = EarlyStopping(monitor='val_accuracy', patience=5, mode='max', min_delta=0.01,
baseline=0.85)
history = model.fit(data, labels, epochs=50, validation_split=0.3, callbacks=[es])

# Plot accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Make predictions on new data


new_texts = ["is upset that he can't update his Facebook by texting it... and might
cry as a result School today ...",
"@Kenichan I dived many times for the ball. Managed to save 50% The
rest go out of bounds",
"my whole body feels itchy and like its on fire",
"@nationwideclass no, it's not behaving at all. i'm mad. why am i
here? because I can't see you all o...",
"@Kwesidei not the whole crew", "@LettyA ahh ive always wanted to see
rent love the soundtrack!!",
"@FakerPattyPattz Oh dear. Were you drinking out of the forgotten
table drinks? "]

new_sequences = np.array([extract_gpt_features(text) for text in new_texts])


new_data = pad_sequences(new_sequences, maxlen=MAX_SEQ_LENGTH)
predictions = model.predict(new_data)

# Evaluate the model


y_pred = np.round(predictions)
y_true = np.array([0, 0, 0, 0, 0, 0, 0]) # True labels of new data
cm = confusion_matrix(y_true, y_pred)
print(cm)

You might also like