0% found this document useful (0 votes)

16 views

NLP Lab Programs

Uploaded by

jammuramadevi.23mtech.csm

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

16 views

NLP Lab Programs

Uploaded by

jammuramadevi.23mtech.csm

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 18

1.

Convert the text into tokens and find the word frequency

from collections import Counter

import re

def tokenize(text):
tokens = re.findall(r'\b\w+\b', text.lower())
return tokens

def word_frequency(tokens):
frequency = Counter(tokens)
return frequency

text = "This is a simple text. This text is for testing the word frequency program. This is simple."

tokens = tokenize(text)

frequency = word_frequency(tokens)

for word, count in frequency.items():

print(f"{word}: {count}")
2. Perform Lemmatization and Stemming

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

def tokenize(text):
tokens = word_tokenize(text.lower())
return tokens

def lemmatize(tokens):
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
return lemmatized_tokens

def stem(tokens):
stemmer = PorterStemmer()
stemmed_tokens = [stemmer.stem(token) for token in tokens]
return stemmed_tokens

text = "The striped bats are hanging on their feet for best."

tokens = tokenize(text)

lemmatized_tokens = lemmatize(tokens)
print("Lemmatized Tokens:")
print(lemmatized_tokens)

stemmed_tokens = stem(tokens)
print("\nStemmed Tokens:")
print(stemmed_tokens)
3. Implement Bi-gram

import nltk
from nltk.tokenize import word_tokenize
from nltk.util import bigrams

nltk.download('punkt')

def generate_bigrams(text):
tokens = word_tokenize(text.lower())

bigram_list = list(bigrams(tokens))

return bigram_list

text = "The striped bats are hanging on their feet for best."

bigrams_result = generate_bigrams(text)

print("Bigrams:")
for bigram in bigrams_result:
print(bigram)
4. Identify parts-of Speech using Penn Treebank tag set

pip install nltk

import nltk
from nltk.corpus import treebank
from nltk.tokenize import word_tokenize
from nltk import pos_tag

nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

sentence = "The quick brown fox jumps over the lazy dog."

words = word_tokenize(sentence)

tagged_words = pos_tag(words)

print("Sentence:", sentence)
print("Tagged Words:", tagged_words)

nltk.download('treebank')

tagged_sentences = treebank.tagged_sents()

states = set()
observations = set()

for sentence in tagged_sentences:

for word, tag in sentence:
states.add(tag)
observations.add(word.lower())

states = list(states)
observations = list(observations)

transition_counts = defaultdict(lambda: defaultdict(int))

for sentence in tagged_sentences:
prev_tag = "<s>"
for word, tag in sentence:
transition_counts[prev_tag][tag] += 1
prev_tag = tag
transition_counts[prev_tag]["</s>"] += 1

transition_probabilities = defaultdict(lambda: defaultdict(float))

for prev_tag, next_tags in transition_counts.items():
total_count = sum(next_tags.values())
for next_tag, count in next_tags.items():
transition_probabilities[prev_tag][next_tag] = count / total_count
emission_counts = defaultdict(lambda: defaultdict(int))
for sentence in tagged_sentences:
for word, tag in sentence:
emission_counts[tag][word.lower()] += 1

emission_probabilities = defaultdict(lambda: defaultdict(float))

for tag, words in emission_counts.items():
total_count = sum(words.values())
for word, count in words.items():
emission_probabilities[tag][word] = count / total_count

initial_counts = defaultdict(int)
for sentence in tagged_sentences:
initial_counts[sentence[0][1]] += 1

initial_probabilities = defaultdict(float)
total_count = sum(initial_counts.values())
for tag, count in initial_counts.items():
initial_probabilities[tag] = count / total_count

def viterbi(observations, states, start_p, trans_p, emit_p):

V = [{}]
path = {}

for state in states:

V[0][state] = start_p[state] * emit_p[state].get(observations[0], 0)
path[state] = [state]

for t in range(1, len(observations)):

V.append({})
newpath = {}

for y in states:
(prob, state) = max((V[t-1][y0] * trans_p[y0].get(y, 0) * emit_p[y].get(observations[t], 0),
y0) for y0 in states)
V[t][y] = prob
newpath[y] = path[state] + [y]

path = newpath

n = len(observations) - 1
(prob, state) = max((V[n][y], y) for y in states)
return (prob, path[state])

sentence = "the quick brown fox jumps over the lazy dog"
observations = sentence.lower().split()
prob, tags = viterbi(observations, states, initial_probabilities, transition_probabilities,
emission_probabilities)
print("Sentence:", sentence)
print("Tags:", tags)

from nltk import pos_tag, word_tokenize

from nltk.chunk import RegexpParser

def chunk_sentence(sentence, grammar):

words = word_tokenize(sentence)
tagged = pos_tag(words)
parser = RegexpParser(grammar)
chunks = parser.parse(tagged)
return chunks

NP: {<DT|JJ|NN.*>+} # Chunk sequences of DT, JJ, NN

VP: {<VB.*><NP|PP|CLAUSE>+$} # Chunk verbs and their arguments
PP: {<IN><NP>} # Chunk prepositions followed by NP
CLAUSE: {<NP><VP>} # Chunk NP, VP
"""

sentence = "The quick brown fox jumps over the lazy dog"
chunks = chunk_sentence(sentence, grammar)
print(chunks)
6. Find the synonym of a word and antonym of a word using WordNet

import nltk
from nltk.corpus import wordnet as wn

nltk.download('wordnet')
nltk.download('omw-1.4')

def get_synonyms(word):
synonyms = set()
for syn in wn.synsets(word):
for lemma in syn.lemmas():
synonyms.add(lemma.name())
return synonyms

def get_antonyms(word):
antonyms = set()
for syn in wn.synsets(word):
for lemma in syn.lemmas():
if lemma.antonyms():
antonyms.add(lemma.antonyms()[0].name())
return antonyms

word = "happy"

synonyms = get_synonyms(word)
antonyms = get_antonyms(word)

print(f"Synonyms of '{word}':", synonyms)

print(f"Antonyms of '{word}':", antonyms)

7. Implement semantic role labeling to identify named entities (Same as Program-9)

Goto Command Prompt

Install Python 3.12.4

Set the path where your programs folder is runned

C:>>pip install spacy

C:>> python -m spacy download en_core_web_sm

C:>> python –version

C:>> pip --version

C:>> pip show spacy

C:>> import spacy

C:>> pip install jupyter

C:>> jupyter notebook

• This will open the Jupyter Environment

• Create a file “test_spacy.ipnyb” file

Paste the following code in single row

import spacy

# Load the spaCy model

nlp = spacy.load("en_core_web_sm")

# Example text
text = "Barack Obama was born in Hawaii. He was elected president in 2008."

# Process the text

doc = nlp(text)

# Extract named entities

named_entities = [(ent.text, ent.label_) for ent in doc.ents]

# Print named entities

print("Named Entities:", named_entities)

C:>> python test_spacy.py

8. Implement POS tagging using LSTM

!pip install tensorflow

import tensorflow as tf
print(tf.__version__)
!pip install numpy tensorflow keras scikit-learn nltk

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, TimeDistributed
from sklearn.model_selection import train_test_split
import nltk
from nltk.corpus import treebank
from nltk.tokenize import word_tokenize

nltk.download('treebank')
nltk.download('punkt')

def load_data():
sentences = treebank.sents()
tags = treebank.tagged_sents()

words = [word.lower() for sentence in sentences for word in sentence]

tags_flattened = [tag for sentence in tags for _, tag in sentence]

unique_words = sorted(set(words))
unique_tags = sorted(set(tags_flattened))

word_to_index = {word: i + 2 for i, word in enumerate(unique_words)}

tag_to_index = {tag: i + 1 for i, tag in enumerate(unique_tags)}

word_to_index['<PAD>'] = 0
word_to_index['<UNK>'] = 1
tag_to_index['<PAD>'] = 0

index_to_word = {i: word for word, i in word_to_index.items()}

index_to_tag = {i: tag for tag, i in tag_to_index.items()}
X = [[word_to_index.get(word, 1) for word in sentence] for sentence in sentences]
y = [[tag_to_index[tag] for word, tag in sentence] for sentence in tags]

max_len = max(len(sentence) for sentence in X)

X = pad_sequences(X, maxlen=max_len, padding='post')
y = pad_sequences(y, maxlen=max_len, padding='post')
y = to_categorical(y, num_classes=len(tag_to_index))

return X, y, word_to_index, tag_to_index, index_to_word, index_to_tag

# Load data and mappings

X, y, word_to_index, tag_to_index, index_to_word, index_to_tag = load_data()

# Split the data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Embedding(input_dim=len(word_to_index), output_dim=50,
input_length=X_train.shape[1]))
model.add(LSTM(units=100, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))
model.add(TimeDistributed(Dense(len(tag_to_index), activation='softmax')))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model

history = model.fit(X_train, y_train, batch_size=32, epochs=5, validation_split=0.1, verbose=1)

# Evaluate the model

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")
# Predict POS tags for a new sentence
def predict_pos(sentence):
tokens = word_tokenize(sentence.lower())
indices = [word_to_index.get(token, 1) for token in tokens]
indices = pad_sequences([indices], maxlen=X_train.shape[1], padding='post')
predictions = model.predict(indices)
predicted_tags = [index_to_tag[np.argmax(tag)] for tag in predictions[0]]
return list(zip(tokens, predicted_tags))

# Example usage
sentence = "Barack Obama was born in Hawaii."
print(predict_pos(sentence))
10. Develop a movie review system (sentiment analysis on movie data)

!pip install numpy pandas scikit-learn nltk

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
import nltk
nltk.download('movie_reviews')
nltk.download('punkt')

from nltk.corpus import movie_reviews

from sklearn.datasets import load_files

# Load the movie reviews dataset from NLTK

def load_movie_reviews():
reviews = []
labels = []

for fileid in movie_reviews.fileids():

category = fileid.split('/')[0] # 'pos' or 'neg'
with movie_reviews.open(fileid) as f:
review = f.read()
reviews.append(review)
labels.append(category)

return pd.DataFrame({'review': reviews, 'sentiment': labels})

data = load_movie_reviews()
# Map sentiments to binary labels
data['sentiment'] = data['sentiment'].map({'pos': 1, 'neg': 0})

# Split the data into training and testing sets

X = data['review']
y = data['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Build a pipeline with CountVectorizer and MultinomialNB
pipeline = Pipeline([
('vectorizer', CountVectorizer()), # Converts text to feature vectors
('classifier', MultinomialNB()) # Naive Bayes classifier
])

# Train the model

pipeline.fit(X_train, y_train)

# Make predictions on the test set

y_pred = pipeline.predict(X_test)

# Evaluate the model

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

def predict_sentiment(review):
prediction = pipeline.predict([review])
sentiment = 'Positive' if prediction[0] == 1 else 'Negative'
return sentiment

# Example usage
new_review = "The movie was fantastic! I really enjoyed the performances."
print(f"Review: {new_review}")
print(f"Predicted Sentiment: {predict_sentiment(new_review)}")

9th Class Complete English PDF MCQs Notes Download Free
85% (99)
9th Class Complete English PDF MCQs Notes Download Free
64 pages
Esol Practice Grammar 1-2
100% (2)
Esol Practice Grammar 1-2
140 pages
Princeton Russian Course SLA207
No ratings yet
Princeton Russian Course SLA207
196 pages
The Nzema Embedded Clause
No ratings yet
The Nzema Embedded Clause
7 pages
Workbook++a1+ +b1+ +German+With+John+Berlin
No ratings yet
Workbook++a1+ +b1+ +German+With+John+Berlin
415 pages
Lisp Interpreter in Rust
From Everand
Lisp Interpreter in Rust
Vishal Patil
1/5 (1)
Present Continuous Tense
100% (1)
Present Continuous Tense
28 pages
R22 Nlp Python Programs
No ratings yet
R22 Nlp Python Programs
15 pages
ASTW RA03 PracticalManual
No ratings yet
ASTW RA03 PracticalManual
18 pages
Rajeev Mishra 20 SCSE1180087
No ratings yet
Rajeev Mishra 20 SCSE1180087
29 pages
DSBD 7 Ass
No ratings yet
DSBD 7 Ass
9 pages
Self Evaluation Exercises (1)
No ratings yet
Self Evaluation Exercises (1)
12 pages
NLP Assignment(917722H031)
No ratings yet
NLP Assignment(917722H031)
18 pages
NLP Lab
No ratings yet
NLP Lab
18 pages
7.TextAnalysis
No ratings yet
7.TextAnalysis
3 pages
NLP Final Review
No ratings yet
NLP Final Review
32 pages
Shubham Jade MSC It 31031420010 NLP Practical Journal
No ratings yet
Shubham Jade MSC It 31031420010 NLP Practical Journal
17 pages
NLP Lab1
No ratings yet
NLP Lab1
6 pages
Natural Language Processing
No ratings yet
Natural Language Processing
17 pages
C24064_NLP_LAB MANUAL
No ratings yet
C24064_NLP_LAB MANUAL
28 pages
SK NLP Practical (FS)
No ratings yet
SK NLP Practical (FS)
22 pages
NLP_record[1][1] (1)
No ratings yet
NLP_record[1][1] (1)
23 pages
Final_NLP_Lab_File
No ratings yet
Final_NLP_Lab_File
28 pages
NLP Final
No ratings yet
NLP Final
26 pages
NLP Record
No ratings yet
NLP Record
15 pages
1
No ratings yet
1
13 pages
Nlp exp 5 , implement stemming, lemmetization, pos_ tag, wordNet - Colab
No ratings yet
Nlp exp 5 , implement stemming, lemmetization, pos_ tag, wordNet - Colab
2 pages
I041 NLP Assignment5
No ratings yet
I041 NLP Assignment5
12 pages
NLP Op
No ratings yet
NLP Op
16 pages
1a NLTK
No ratings yet
1a NLTK
10 pages
NLP Projects
No ratings yet
NLP Projects
4 pages
20BCP112 - NLP Lab - LAB - Manual
No ratings yet
20BCP112 - NLP Lab - LAB - Manual
65 pages
NLP FinAL (1)
No ratings yet
NLP FinAL (1)
27 pages
DS 7
No ratings yet
DS 7
3 pages
NLP - Practical List
No ratings yet
NLP - Practical List
14 pages
20BCP123 - NLP Lab Manual
No ratings yet
20BCP123 - NLP Lab Manual
45 pages
Sumati
No ratings yet
Sumati
10 pages
Nlp Lab Manual
No ratings yet
Nlp Lab Manual
21 pages
NLP - Cheatsheet
No ratings yet
NLP - Cheatsheet
10 pages
NLP Expts
No ratings yet
NLP Expts
41 pages
Aped For Fake News
No ratings yet
Aped For Fake News
6 pages
NLP Exp 4
No ratings yet
NLP Exp 4
2 pages
115 Ir 7
No ratings yet
115 Ir 7
6 pages
NLP lab Manual (3)
No ratings yet
NLP lab Manual (3)
7 pages
NLP LAB_MANUAL (1)
No ratings yet
NLP LAB_MANUAL (1)
33 pages
Machine Learning For NLP: Vocabulary
No ratings yet
Machine Learning For NLP: Vocabulary
37 pages
Practicle 7-notes
No ratings yet
Practicle 7-notes
2 pages
All Practicals
No ratings yet
All Practicals
33 pages
123nlp456
No ratings yet
123nlp456
4 pages
Sahil NLP
No ratings yet
Sahil NLP
16 pages
From Import From Import From Import From Import Import
No ratings yet
From Import From Import From Import From Import Import
3 pages
BE4-A-17-NLP-EXP6
No ratings yet
BE4-A-17-NLP-EXP6
4 pages
x0 Process
No ratings yet
x0 Process
4 pages
Record
No ratings yet
Record
6 pages
Nlp Assignment 4(22bce9560)
No ratings yet
Nlp Assignment 4(22bce9560)
12 pages
Sanjey_nlp10_merged
No ratings yet
Sanjey_nlp10_merged
11 pages
NLP Lab Assignment 8
No ratings yet
NLP Lab Assignment 8
14 pages
HMM Pos Tagging 05
No ratings yet
HMM Pos Tagging 05
1 page
Natural Language Processing
No ratings yet
Natural Language Processing
22 pages
7 idf
No ratings yet
7 idf
5 pages
NLP Manual
No ratings yet
NLP Manual
21 pages
NLP Tushar
No ratings yet
NLP Tushar
21 pages
NLP LAB MANUAL
No ratings yet
NLP LAB MANUAL
17 pages
Programs code
No ratings yet
Programs code
7 pages
A Short Course in Discrete Mathematics
From Everand
A Short Course in Discrete Mathematics
Edward A. Bender
3/5 (1)
Indirect Speech - Worksheets
No ratings yet
Indirect Speech - Worksheets
20 pages
Phrasal Verbs
No ratings yet
Phrasal Verbs
24 pages
Lesson 2 19.10.11 WORD FORMATION Official
No ratings yet
Lesson 2 19.10.11 WORD FORMATION Official
5 pages
Unit 3 NLP
No ratings yet
Unit 3 NLP
7 pages
تعديلات الجابتر الاول والثاني
No ratings yet
تعديلات الجابتر الاول والثاني
21 pages
【Chapter 10】sentence problems & corpus PDF
No ratings yet
【Chapter 10】sentence problems & corpus PDF
35 pages
Fill Blank Story Elf
No ratings yet
Fill Blank Story Elf
1 page
Curso Intensivo
No ratings yet
Curso Intensivo
28 pages
About, After, Along, Among As, at Before, Behind, Beside, Between, Beyond, by For, From, In, Of, Off On, To, Up, With
No ratings yet
About, After, Along, Among As, at Before, Behind, Beside, Between, Beyond, by For, From, In, Of, Off On, To, Up, With
4 pages
Adverb Material
No ratings yet
Adverb Material
6 pages
CBSE Class X English Summer work sheet -2 Modals
No ratings yet
CBSE Class X English Summer work sheet -2 Modals
2 pages
Book 01 Chapter Index
No ratings yet
Book 01 Chapter Index
26 pages
Preposition Jkchrome Com
No ratings yet
Preposition Jkchrome Com
10 pages
English Grammar Suggestion
No ratings yet
English Grammar Suggestion
35 pages
English Morphosyntax
No ratings yet
English Morphosyntax
17 pages
Finite and Non Finite Verb
No ratings yet
Finite and Non Finite Verb
6 pages
9th Class Full Book Mcqs
No ratings yet
9th Class Full Book Mcqs
7 pages
Prepositions Lesson 04: Madinah Arabic Book 1
No ratings yet
Prepositions Lesson 04: Madinah Arabic Book 1
9 pages
Tenses in English PDF by Ajay Sir Accent Hisar
No ratings yet
Tenses in English PDF by Ajay Sir Accent Hisar
17 pages
Grammatical Theory and Bilingual Codeswitching 1st Edition Jeff Macswan instant download
100% (1)
Grammatical Theory and Bilingual Codeswitching 1st Edition Jeff Macswan instant download
58 pages
GERUND
No ratings yet
GERUND
16 pages
The Hebrew Verbless Clause
No ratings yet
The Hebrew Verbless Clause
35 pages
Presentation of Expose About Pronouns
100% (1)
Presentation of Expose About Pronouns
7 pages
EL024 Syntax - Inclass Quiz 2 AK
No ratings yet
EL024 Syntax - Inclass Quiz 2 AK
9 pages

NLP Lab Programs

Uploaded by

NLP Lab Programs

Uploaded by

1.

from collections import Counter

for word, count in frequency.items():

pip install nltk

POS Tags defined in treebank

for sentence in tagged_sentences:

transition_counts = defaultdict(lambda: defaultdict(int))

transition_probabilities = defaultdict(lambda: defaultdict(float))

emission_probabilities = defaultdict(lambda: defaultdict(float))

def viterbi(observations, states, start_p, trans_p, emit_p):

for state in states:

for t in range(1, len(observations)):

from nltk import pos_tag, word_tokenize

def chunk_sentence(sentence, grammar):

NP: {<DT|JJ|NN.*>+} # Chunk sequences of DT, JJ, NN

print(f"Synonyms of '{word}':", synonyms)

7. Implement semantic role labeling to identify named entities (Same as Program-9)

Goto Command Prompt

Install Python 3.12.4

Set the path where your programs folder is runned

C:>>pip install spacy

C:>> python -m spacy download en_core_web_sm

C:>> python –version

C:>> pip --version

C:>> pip show spacy

C:>> pip install jupyter

C:>> jupyter notebook

• This will open the Jupyter Environment

Paste the following code in single row

# Load the spaCy model

# Process the text

# Extract named entities

# Print named entities

C:>> python test_spacy.py

!pip install tensorflow

words = [word.lower() for sentence in sentences for word in sentence]

word_to_index = {word: i + 2 for i, word in enumerate(unique_words)}

index_to_word = {i: word for word, i in word_to_index.items()}

max_len = max(len(sentence) for sentence in X)

return X, y, word_to_index, tag_to_index, index_to_word, index_to_tag

# Load data and mappings

# Split the data

# Train the model

# Evaluate the model

!pip install numpy pandas scikit-learn nltk

from nltk.corpus import movie_reviews

# Load the movie reviews dataset from NLTK

for fileid in movie_reviews.fileids():

return pd.DataFrame({'review': reviews, 'sentiment': labels})

# Split the data into training and testing sets

# Train the model

# Make predictions on the test set

# Evaluate the model

You might also like