0% found this document useful (0 votes)

12 views7 pages

Programs code

The document contains multiple Python programs utilizing the NLTK library for natural language processing tasks such as text preprocessing, tokenization, part-of-speech tagging, named entity recognition, and typo correction. Key functionalities include counting words and unique words in Moby Dick, generating TF-IDF matrices, and enhancing customer messages by correcting typos. The programs also demonstrate stemming and lemmatization techniques on titles extracted from a JSONL file.

Uploaded by

mickeypinky123

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

12 views7 pages

Programs code

Uploaded by

mickeypinky123

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 7

Program 1

# Import necessary libraries

import nltk
from nltk.corpus import gutenberg, stopwords
import string

# Download necessary NLTK resources

nltk.download('gutenberg')
nltk.download('stopwords')
nltk.download('punkt')

# Load the text of Moby Dick from the Gutenberg corpus

raw_text = gutenberg.raw('melville-moby_dick.txt')

# Define a preprocessing function to clean and tokenize the text

def preprocess_text(text):
# Convert text to lowercase
text = text.lower()

# Remove punctuation
text = text.translate(str.maketrans('', '', string.punctuation))

# Tokenize the text into words

words = nltk.word_tokenize(text)

# Remove stop words

stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]

return filtered_words

# Preprocess the text

processed_text = preprocess_text(raw_text)

# Count the number of documents (we have only one document here:
Moby Dick)
document_count = 1
# Count the total number of words and unique words
word_count = len(processed_text)
unique_words = set(processed_text)
unique_word_count = len(unique_words)

# Display the results

print("Document Count:", document_count)
print("Total Words in Corpus:", word_count)
print("Unique Words in Corpus:", unique_word_count)

Program 3

import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
corpus = ["The quick brown fox jumps over the lazy dog."]
tokenized_corpus = [word_tokenize(sentence) for sentence in corpus]
flat_corpus = [word.lower() for sentence in tokenized_corpus for word in
sentence]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(flat_corpus)
pos_tags = pos_tag(flat_corpus)
named_entities = ne_chunk(pos_tags)
grammar = r"""
NP: {<DT|JJ|NN.*>+}
VP: {<VB.*>+}
PP: {<IN> <NP>}
"""
chunk_parser = nltk.RegexpParser(grammar)
chunked_sentences = chunk_parser.parse(pos_tags)
print("TF-IDF Matrix:")
print(tfidf_matrix.toarray())
print("\nPart-of-Speech Tags:")
print(pos_tags)
print("\nNamed Entities:")
print(named_entities)
print("\nChunked Sentences:")
print(chunked_sentences)

Program 4:

!Pip install Textblob

from textblob import TextBlob

def correct_typos(text):

# Create a TextBlob object for spell-checking

blob = TextBlob(text)

# Correct typos using TextBlob

return str(blob.correct())

def enhance_message(message):

# Correct typos and grammatical errors

corrected_message = correct_typos(message)

return corrected_message

# Example usage

if __name__ == "__main__":

# Example customer message with typos and grammatical errors

customer_message = "u r looking awsome"

# Enhance the customer message

enhanced_message = enhance_message(customer_message)

# Print the corrected message

print("Original Message: ", customer_message)

print("Enhanced Message: ", enhanced_message)

program 5:

import nltk

from nltk.stem import PorterStemmer, WordNetLemmatizer

nltk.download('wordnet')

nltk.download('omw-1.4')

import json

import re

# %%

stemmer = PorterStemmer()

lemmatizer = WordNetLemmatizer()

# %%

def stem_text(text):

words = nltk.word_tokenize(text)

return ' '.join(stemmer.stem(word) for word in words)

def lemmatize_text(text):

words = nltk.word_tokenize(text)

return ' '.join(lemmatizer.lemmatize(word) for word in words)

# %%

file_path = "dataset.jsonl"

# %%

titles = []
with open(file_path, 'r') as file:

for line in file:

data = json.loads(line)

titles.append(data.get('metadata', {}).get('title'))

# %%

titles = [re.sub(r'\n', '', t) for t in titles]

print(titles[:5])

# %%

titles = [t.lower() for t in titles]

print(titles[:5])

# %%

stem_titles = [stem_text(t) for t in titles]

lemm_titles = [lemmatize_text(t) for t in titles]

print(stem_titles[:5])

print(lemm_titles[:5])

# %%

keyword = "fluid"

keyword = keyword.lower()

stem_matches = []

lemm_matches = []
# Search in stemmed titles

for stem_title in stem_titles:

if keyword in stem_title.lower():

stem_matches.append(stem_title)

# Search in lemmatized titles

for lemm_title in lemm_titles:

if keyword in lemm_title.lower():

lemm_matches.append(lemm_title)

print("Matches in stemmed titles:", len(stem_matches))

print("Matches in lemmatized titles:", len(lemm_matches))

# %%

for i in stem_matches:

print(i)

# %%

for j in lemm_matches:

print(j)

Program 6:

import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
def extract_named_entities(text):
sentences = sent_tokenize(text)
named_entities = []
for sentence in sentences:
tokens = word_tokenize(sentence)
pos_tags = pos_tag(tokens)
chunks = ne_chunk(pos_tags)
for chunk in chunks:
if hasattr(chunk, 'label'):
entity_type = chunk.label()
entity_name = ' '.join([c[0] for c in chunk])
named_entities.append((entity_name, entity_type))
return named_entities
text = """President Joe Biden is scheduled to meet with Ukrainian
President Volodymyr Zelenskyy at the White House on Tuesday.
The meeting comes amid ongoing discussions about military aid and
support for Ukraine's defense against Russian aggression."""
named_entities = extract_named_entities(text)
for entity_name, entity_type in named_entities:
print(f"Entity: {entity_name}, Type: {entity_type}")

Startar På Rev M För Att Ha Kopplingen Till K046804 (GB) Som Är Utgångsdokumentet Som Skall Följas!
91% (11)
Startar På Rev M För Att Ha Kopplingen Till K046804 (GB) Som Är Utgångsdokumentet Som Skall Följas!
23 pages
Game+Informer+2013 11
100% (3)
Game+Informer+2013 11
104 pages
1Z1-333 Oracle Fusion Financials Cloud Service General Ledger 2016 Implementation Essentials - New
No ratings yet
1Z1-333 Oracle Fusion Financials Cloud Service General Ledger 2016 Implementation Essentials - New
43 pages
Making Websites Win-Conversion Rate Experts (00500981xC3C80) PDF
100% (4)
Making Websites Win-Conversion Rate Experts (00500981xC3C80) PDF
350 pages
NLP Lab1
No ratings yet
NLP Lab1
6 pages
Sahil NLP
No ratings yet
Sahil NLP
16 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
15 pages
Final_NLP_Lab_File
No ratings yet
Final_NLP_Lab_File
28 pages
7.TextAnalysis
No ratings yet
7.TextAnalysis
3 pages
Natural Language Processing
No ratings yet
Natural Language Processing
17 pages
Shubham Jade MSC It 31031420010 NLP Practical Journal
No ratings yet
Shubham Jade MSC It 31031420010 NLP Practical Journal
17 pages
SK NLP Practical (FS)
No ratings yet
SK NLP Practical (FS)
22 pages
123nlp456
No ratings yet
123nlp456
4 pages
1a NLTK
No ratings yet
1a NLTK
10 pages
NLP LAB_MANUAL (1)
No ratings yet
NLP LAB_MANUAL (1)
33 pages
NLP (1)
No ratings yet
NLP (1)
12 pages
NLP - Practical List
No ratings yet
NLP - Practical List
14 pages
NLP Lab Manual (1)
No ratings yet
NLP Lab Manual (1)
19 pages
AI Lab Manual aktu
No ratings yet
AI Lab Manual aktu
11 pages
ASTW RA03 PracticalManual
No ratings yet
ASTW RA03 PracticalManual
18 pages
R22 Nlp Python Programs
No ratings yet
R22 Nlp Python Programs
15 pages
NLP LAB MANUAL
No ratings yet
NLP LAB MANUAL
17 pages
NLP Expts
No ratings yet
NLP Expts
41 pages
Lab2 IR
No ratings yet
Lab2 IR
16 pages
NLP Projects
No ratings yet
NLP Projects
4 pages
20BCP123 - NLP Lab Manual
No ratings yet
20BCP123 - NLP Lab Manual
45 pages
Sample Program Using Python 3
No ratings yet
Sample Program Using Python 3
5 pages
NLP Lab Programs
No ratings yet
NLP Lab Programs
18 pages
NLP_record[1][1] (1)
No ratings yet
NLP_record[1][1] (1)
23 pages
tinywow_pythass3_77951173
No ratings yet
tinywow_pythass3_77951173
17 pages
NLP-2
No ratings yet
NLP-2
1 page
NLP Practicals
No ratings yet
NLP Practicals
6 pages
NLP Smitpatel
No ratings yet
NLP Smitpatel
32 pages
Assignment 2 IR
No ratings yet
Assignment 2 IR
6 pages
DSBD 7 Ass
No ratings yet
DSBD 7 Ass
9 pages
NLP - Cheatsheet
No ratings yet
NLP - Cheatsheet
10 pages
Ai&Ml Bai601 Nlp Lab Manual
No ratings yet
Ai&Ml Bai601 Nlp Lab Manual
48 pages
NLP Record
No ratings yet
NLP Record
15 pages
20BCP112 - NLP Lab - LAB - Manual
No ratings yet
20BCP112 - NLP Lab - LAB - Manual
65 pages
Bling
No ratings yet
Bling
7 pages
Self Evaluation Exercises (1)
No ratings yet
Self Evaluation Exercises (1)
12 pages
Record
No ratings yet
Record
6 pages
NLP PRATICAL
No ratings yet
NLP PRATICAL
14 pages
GenAIL
No ratings yet
GenAIL
12 pages
1
No ratings yet
1
13 pages
Soundarya 256 NLP Practs
No ratings yet
Soundarya 256 NLP Practs
14 pages
Jal Patel NLP
No ratings yet
Jal Patel NLP
32 pages
DS 7
No ratings yet
DS 7
3 pages
Assessment - 2: - K Mary Nikitha
No ratings yet
Assessment - 2: - K Mary Nikitha
27 pages
NLP Final Review
No ratings yet
NLP Final Review
32 pages
Text Processing
No ratings yet
Text Processing
16 pages
Unit2 Full
No ratings yet
Unit2 Full
28 pages
Python NLP Assignment
No ratings yet
Python NLP Assignment
9 pages
NLP Assignment(917722H031)
No ratings yet
NLP Assignment(917722H031)
18 pages
Nlp Lab Manual
No ratings yet
Nlp Lab Manual
32 pages
Rajeev Mishra 20 SCSE1180087
No ratings yet
Rajeev Mishra 20 SCSE1180087
29 pages
Natural Language Processing Lab 9
No ratings yet
Natural Language Processing Lab 9
13 pages
01 NLP - Merged Vinay
No ratings yet
01 NLP - Merged Vinay
27 pages
NLP LAB MANUAL 3-2 AIML R22 UPDATE (1)
100% (1)
NLP LAB MANUAL 3-2 AIML R22 UPDATE (1)
20 pages
NLP Using Python
No ratings yet
NLP Using Python
4 pages
NLP Lab Assignment 8
No ratings yet
NLP Lab Assignment 8
14 pages
Dsbdl Assn 07
No ratings yet
Dsbdl Assn 07
4 pages
Angular Generative AI: Building an intelligent CV enhancer with Google Gemini
From Everand
Angular Generative AI: Building an intelligent CV enhancer with Google Gemini
Abdelfattah Ragab
No ratings yet
Simplifying Data Science With Python
From Everand
Simplifying Data Science With Python
Billy David millican
No ratings yet
xyz
No ratings yet
xyz
27 pages
DV
No ratings yet
DV
30 pages
MODULE 4
No ratings yet
MODULE 4
112 pages
MODULE 4-IS
No ratings yet
MODULE 4-IS
79 pages
Program 1
No ratings yet
Program 1
2 pages
AP Constituency
No ratings yet
AP Constituency
20 pages
All About Harmonics
No ratings yet
All About Harmonics
240 pages
Foııng: Scanned by Camscanner
100% (4)
Foııng: Scanned by Camscanner
5 pages
Goindigo - In: Email Boarding Pass (Web Check In)
No ratings yet
Goindigo - In: Email Boarding Pass (Web Check In)
1 page
N6QW Homebrewing-For-Qrp-Ssb-Homebrew-Qrp-And
100% (1)
N6QW Homebrewing-For-Qrp-Ssb-Homebrew-Qrp-And
24 pages
Priyanka
No ratings yet
Priyanka
1 page
PCIF INDIVIDUAL SA With Markings R. Navato
No ratings yet
PCIF INDIVIDUAL SA With Markings R. Navato
1 page
SimplyBluMerchantPortal(Web)-UserGuidev1
No ratings yet
SimplyBluMerchantPortal(Web)-UserGuidev1
38 pages
Case Analysis Business Analytics
No ratings yet
Case Analysis Business Analytics
2 pages
PRI Vs SIP Trunking WP
No ratings yet
PRI Vs SIP Trunking WP
3 pages
CCR1009 7G 1C 1splus 170313141507
No ratings yet
CCR1009 7G 1C 1splus 170313141507
2 pages
Paper Template For International Academy Publishing: Full First Author, Full Second Author, Full Third Author
No ratings yet
Paper Template For International Academy Publishing: Full First Author, Full Second Author, Full Third Author
6 pages
CISSP Laws Regs Handout
No ratings yet
CISSP Laws Regs Handout
23 pages
Myrius Nextgen Main Brochure
No ratings yet
Myrius Nextgen Main Brochure
92 pages
WCF Tools and Libraries Print
No ratings yet
WCF Tools and Libraries Print
28 pages
Code 16 swhihatPAD KIT ARDUINO
No ratings yet
Code 16 swhihatPAD KIT ARDUINO
5 pages
DAY 2 PPT one
No ratings yet
DAY 2 PPT one
34 pages
Joshua Greenberg 9 1 12
No ratings yet
Joshua Greenberg 9 1 12
3 pages
Approximate Integration: Dr. Lê Xuân Đ I
No ratings yet
Approximate Integration: Dr. Lê Xuân Đ I
34 pages
Iain E. Richardson - H265 - HEVC
100% (1)
Iain E. Richardson - H265 - HEVC
12 pages
Technical Paper Presentation On Application of Cartoon Like Effects To Actual Images
No ratings yet
Technical Paper Presentation On Application of Cartoon Like Effects To Actual Images
7 pages
Integrating Artificial Intelligence and ChatGPT Into Higher Engineering Education
No ratings yet
Integrating Artificial Intelligence and ChatGPT Into Higher Engineering Education
567 pages
Elcometer 309 Delta T Hygrometer Data Sheet
No ratings yet
Elcometer 309 Delta T Hygrometer Data Sheet
2 pages
asc-lb-um-3000
No ratings yet
asc-lb-um-3000
60 pages
ASM_2761
No ratings yet
ASM_2761
7 pages
An Overview of Content Analysis
No ratings yet
An Overview of Content Analysis
20 pages

Programs code

Uploaded by

Programs code

Uploaded by

Program 1

# Import necessary libraries

# Download necessary NLTK resources

# Load the text of Moby Dick from the Gutenberg corpus

# Define a preprocessing function to clean and tokenize the text

# Tokenize the text into words

# Remove stop words

# Preprocess the text

# Display the results

!Pip install Textblob

from textblob import TextBlob

# Create a TextBlob object for spell-checking

# Correct typos using TextBlob

# Correct typos and grammatical errors

# Example customer message with typos and grammatical errors

customer_message = "u r looking awsome"

# Enhance the customer message

# Print the corrected message

print("Enhanced Message: ", enhanced_message)

from nltk.stem import PorterStemmer, WordNetLemmatizer

return ' '.join(stemmer.stem(word) for word in words)

return ' '.join(lemmatizer.lemmatize(word) for word in words)

for line in file:

titles = [re.sub(r'\n', '', t) for t in titles]

titles = [t.lower() for t in titles]

stem_titles = [stem_text(t) for t in titles]

lemm_titles = [lemmatize_text(t) for t in titles]

for stem_title in stem_titles:

# Search in lemmatized titles

for lemm_title in lemm_titles:

print("Matches in stemmed titles:", len(stem_matches))

print("Matches in lemmatized titles:", len(lemm_matches))

You might also like