0% found this document useful (0 votes)

24 views16 pages

Sahil NLP

Uploaded by

Shubham Mishra Ji

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

24 views16 pages

Sahil NLP

Uploaded by

Shubham Mishra Ji

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 16

Name:- Vipin Rawat Course:- MCA Section:- 4E

Roll No:-

Q1:-Write a python program to tokenization to statement.

nltk.download('punkt')
from nltk.tokenize import word_tokenize
sentence = "Tokenize this sentence."
tokens = word_tokenize(sentence)
print(tokens)

Q2:-Remove the stopwords.

from nltk.corpus import stopwords

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
filtered_sentence = [word for word in tokens if not word.lower() in stop_words]
print(filtered_sentence)

import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
tokens = ["the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"]
stop_words = set(stopwords.words('english'))
newfiltered_sentence = [word for word in tokens if not word.lower() in stop_words]
print(newfiltered_sentence)

Q3:-To carry out Stemming or Lemmatization.

Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

from nltk.stem import PorterStemmer

ps = PorterStemmer()
stemmed_words = [ps.stem(word) for word in tokens]
print(stemmed_words)

Q4:-To Carry out parts of speech tagging.

nltk.download('averaged_perceptron_tagger')
tagged_words = nltk.pos_tag(tokens)
print(tagged_words)

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
sentence = "This is a sample sentence"
tokens = nltk.word_tokenize(sentence)
tagged_words = nltk.pos_tag(tokens)
print(tagged_words)

Q5:-To carry out chunking of the word based on parts of Speech tagging.

nltk.download('maxent_ne_chunker')
nltk.download('words')
ne_chunks = nltk.ne_chunk(tagged_words)
print(ne_chunks)

Q6:-Regular expression tagger.

import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag, RegexpParser
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

def tokenize_and_chunk_input():
user_input = input("Enter a sentence: ")
tokens = word_tokenize(user_input)
tagged_tokens = pos_tag(tokens)
grammar = r"""
NP: {<DT|JJ|NN.*>+}
PP: {<IN><NP>}
VP: {<VB.*><NP|PP|CLAUSE>+$}
CLAUSE: {<NP><VP>}
"""
chunk_parser = RegexpParser(grammar)
chunks = chunk_parser.parse(tagged_tokens)
return chunks
if __name__ == "__main__":
print("Tokenizing and chunking input from the user using NLTK...")
chunked_input = tokenize_and_chunk_input()
print("Chunked input:", chunked_input)

nltk.download('maxent_ne_chunker')
nltk.download('words')
ne_chunks = nltk.ne_chunk(tagged_words)
print(ne_chunks)

Q7:-Write a program to take input user and to carry out all the basic operations of NLP.

import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag, RegexpParser
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
def tokenize_and_chunk_input():
user_input = input("Enter a sentence: ")
tokens = word_tokenize(user_input)
tagged_tokens = pos_tag(tokens)
grammar = r"""
NP: {<DT|JJ|NN.*>+}
PP: {<IN><NP>}
VP: {<VB.*><NP|PP|CLAUSE>+$}
CLAUSE: {<NP><VP>}
"""
chunk_parser = RegexpParser(grammar)
chunks = chunk_parser.parse(tagged_tokens)
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

return chunks
if __name__ == "__main__":
print("Tokenizing and chunking input from the user using NLTK...")
chunked_input = tokenize_and_chunk_input()
print("Chunked input:", chunked_input)

Q8:- To calculate the TFID Term frequency inverse documents frequency given set of sentences.

from sklearn.feature_extraction.text import TfidfVectorizer

documents = [
"This is the first document.",
"This document is the second document.",
"And this is the third one.",
"Is this the first document?",
]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)
print(tfidf_matrix.toarray())

Q9:-Write a program to count vectorsation and transform each document into a vector of word
count.
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

from sklearn.feature_extraction.text import CountVectorizer

documents = [
"This is the first document.",
"This document is the second document.",
"And this is the third one.",
"Is this the first document?",
]
vectorizer = CountVectorizer()
bow_matrix = vectorizer.fit_transform(documents)
print(bow_matrix.toarray())

Q10:-Write a python program ro find similar word using word to word modle.

from gensim.models import Word2Vec

sentences = [
["this", "is", "the", "first", "sentence", "for", "word2vec"],
["this", "is", "the", "second", "sentence"],
["yet", "another", "sentence"],
["one", "more", "sentence"],
["and", "the", "final", "sentence"],
]
model = Word2Vec(sentences, min_count=1)
similar_words = model.wv.most_similar("sentence")
print(similar_words)

Q11:-Write a python program to find similar word from given two paragraph.

import nltk
from nltk.corpus import gutenberg
nltk.download('gutenberg')
nltk.download('punkt')
paragraph1 = "Hello and welcome! How can I assist you today?"
paragraph2 = "Greetings! I'm here to help. What can I do for you?"
words_paragraph1 = set(nltk.word_tokenize(paragraph1.lower()))
words_paragraph2 = set(nltk.word_tokenize(paragraph2.lower()))
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

similar_words = words_paragraph1.intersection(words_paragraph2)
print("Similar words between the two paragraphs:")
print(similar_words)

Q12:-Write a python program to tokenize the given sentence using two methods sentence by
sentence.

import nltk
nltk.download('punkt')
def tokenize_sentences(text):
sentences = nltk.sent_tokenize(text)
return sentences
text = "Tokenization is the process of breaking text into sentences. It's an important step in natural
language processing."
sentences = tokenize_sentences(text)
for sentence in sentences:
print(sentence)

Q13:-Write a python program to take a sample of atleast 5 line and tokenization either by word or
sentence.

import nltk
nltk.download('punkt')
def tokenize_sentences(text):
sentences = nltk.sent_tokenize(text)
return sentences
text = "Tokenization is the process of breaking text into sentences. It's an important step in natural
language processing. Gjh. yhfh. Jhyfhu "
sentences = tokenize_sentences(text)
for sentence in sentences:
print(sentence)

nltk.sent_tokenize(text)
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

nltk.word_tokenize(text)

def tokenize_sentences(text):
sentences = nltk.sent_tokenize(text)
return sentences
def tokenize_words(sentence):
words = nltk.word_tokenize(sentence)
return words

for sentence in sentences:

print(sentence)
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

Q14:-Write a python program to download sample text from guntaberg and tokenization by send
as well as verb. The sample text

import nltk
from nltk.corpus import gutenberg
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
nltk.download('gutenberg')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
sample_text = gutenberg.raw('shakespeare-hamlet.txt')[:1000]
tokens = word_tokenize(sample_text)
tagged_tokens = pos_tag(tokens)
nouns = [word for word, pos in tagged_tokens if pos.startswith('NN')]
verbs = [word for word, pos in tagged_tokens if pos.startswith('VB')]
print("Nouns:")
print(nouns)
print("\nVerbs:")
print(verbs)

Q15:-Write a python program to remove stopwords from a given text using build in stopwords list
from NLTK.

import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
def remove_stopwords(text):
stop_words = set(stopwords.words('english'))
words = text.split()
filtered_text = [word for word in words if word.lower() not in stop_words]
return ' '.join(filtered_text)
text = "This is a sample sentence with some stop words that need to be removed."
filtered_text = remove_stopwords(text)

print("Filtered text:", filtered_text)

Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

Q16:-Write a python program to remove stopwords from customize list of stopwords.

def remove_stopwords_custom(text, custom_stopwords):

words = text.split()
filtered_text = [word for word in words if word.lower() not in custom_stopwords]
return ' '.join(filtered_text)
custom_stopwords = {'is', 'a', 'with', 'to', 'be'}
filtered_text_custom = remove_stopwords_custom(text, custom_stopwords)
print("Filtered text (custom):", filtered_text_custom)

Q17:-Write a python program using above library of stopwords.

def remove_stopwords_custom(text, custom_stopwords):

import spacy
nlp = spacy.load("en_core_web_sm")
text = "This is a sample sentence with some stop words that need to be removed."
def remove_stopwords_spacy(text):
doc = nlp(text)
filtered_text = [token.text for token in doc if not token.is_stop]
return ' '.join(filtered_text)
filtered_text_spacy = remove_stopwords_spacy(text)
print("Filtered text (spaCy):", filtered_text_spacy)

from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

def remove_stopwords_sklearn(text):
stop_words = set(ENGLISH_STOP_WORDS)
words = text.split()
filtered_text = [word for word in words if word.lower() not in stop_words]
return ' '.join(filtered_text)
filtered_text_sklearn = remove_stopwords_sklearn(text)
print("Filtered text (scikit-learn):", filtered_text_sklearn)
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

from gensim.parsing.preprocessing import remove_stopwords as gensim_remove_stopwords

filtered_text_gensim = gensim_remove_stopwords(text)
print("Filtered text (Gensim):", filtered_text_gensim)

from nltk.stem import SnowballStemmer

def nltk_snowball_stemming(text):
stemmer = SnowballStemmer("english")
tokens = text.split()
stemmed_tokens = [stemmer.stem(token) for token in tokens]
return " ".join(stemmed_tokens)
text = "I am going to the store to buy some apples and oranges"
stemmed_text = nltk_snowball_stemming(text)
print(stemmed_text)

Q18:-Write a python program to describe the stemming using porter Stemming.

from nltk.stem import PorterStemmer

def nltk_stemming(text):
stemmer = PorterStemmer()
tokens = text.split()
stemmed_tokens = [stemmer.stem(token) for token in tokens]
return " ".join(stemmed_tokens)
text = "I am going to the store to buy some apples and oranges"
stemmed_text = nltk_stemming(text)
print(stemmed_text)

Q19:-Write a python program to describe the stemming using Lancaster Stemming.

from nltk.stem import LancasterStemmer

lancaster_stemmer = LancasterStemmer()
words = ["running", "flies", "swimming", "happier", "cats", "dogs"]
stemmed_words = [lancaster_stemmer.stem(word) for word in words]

for original, stemmed in zip(words, stemmed_words):

print(f"Original: {original}, Stemmed: {stemmed}")
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

Q20:-Write a python program to describe the stemming using Regular expression Stemming.

import re
def regex_stemmer(word):
patterns = [
(r's$', ''),
(r'ed$', ''),
(r'ing$', '')
]
for pattern, replacement in patterns:
if re.search(pattern, word):
return re.sub(pattern, replacement, word)
return word
words = ["running", "flies", "swimming", "happier", "cats", "dogs"]
stemmed_words = [regex_stemmer(word) for word in words]
for original, stemmed in zip(words, stemmed_words):
print(f"Original: {original}, Stemmed: {stemmed}")

from sklearn.feature_extraction.text import TfidfVectorizer

documents = [
"This is the first document.",
"This document is the second document.",
"And this is the third one.",
"Is this the first document?",
]
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
print("Feature Names:", tfidf_vectorizer.get_feature_names_out())
print("TF-IDF Matrix:")
print(tfidf_matrix.toarray())
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

import nltk
nltk.download('punkt')
nltk.download('hmm_treebank_pos_tagger')
nltk.download('averaged_perceptron_tagger')
hmm_tagger = nltk.tag.HiddenMarkovModelTagger.train([default_pos_tags])
hmm_pos_tags = hmm_tagger.tag(words)
print("\nStochastic POS tagging (Hidden Markov Model):")
print(hmm_pos_tags)

Q21:-Write a python program to POS tagging.

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.corpus import wordnet
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
text = "The quick brown fox jumps over the lazy dog."
words = word_tokenize(text)
default_pos_tags = pos_tag(words)
print("Default POS tagging:")
print(default_pos_tags)
print("\nRule-based POS tagging (same as default tagger):")
print(default_pos_tags)
nltk.download('hmm_treebank_pos_tagger')
hmm_tagger = nltk.tag.HiddenMarkovModelTagger.train([default_pos_tags])
hmm_pos_tags = hmm_tagger.tag(words)
print("\nStochastic POS tagging (Hidden Markov Model):")
print(hmm_pos_tags)
print("\nStochastic POS tagging (Hidden Markov Model):")
print(hmm_pos_tags)
def get_wordnet_pos(tag):
if tag.startswith('J'):
return wordnet.ADJ
elif tag.startswith('V'):
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

return wordnet.VERB
elif tag.startswith('N'):
return wordnet.NOUN
elif tag.startswith('R'):
return wordnet.ADV
else:
return None
wordnet_tags = pos_tag(words)
wordnet_pos_tags = [(word, get_wordnet_pos(tag)) for word, tag in wordnet_tags]
print("\nDictionary-based POS tagging (WordNet):")
print(wordnet_pos_tags)

Q22:-Write a python program to develop chatbot that helps to diagnosis simple flue symptoms.

import random
greetings = ["Hello!", "Hi there!", "Welcome!", "Greetings!"]
common_questions = [
"What is your name?",
"How can I help you today?",
"What symptoms are you experiencing?",
"Do you have any allergies?",
"Are you currently taking any medications?",
]
responses = [
"I'm sorry, I'm just a chatbot and cannot provide medical advice. It's best to consult with a healthcare
professional.",
"Please consult with a doctor for proper diagnosis and treatment.",
"It's important to seek medical attention for your condition.",
"I recommend reaching out to a healthcare professional to discuss your concerns.",
]
def get_random_greeting():
return random.choice(greetings)
def respond(user_input):
if user_input.endswith("?"):
return random.choice(responses)
else:
return random.choice(common_questions)
def chat():
print(get_random_greeting())
while True:
user_input = input(">")
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

if user_input.lower() == "exit":
break
print(respond(user_input))
chat()

Q22:-Write a python program to create an interactive health chatbot with possible diagnosis and
home treatment for the probably symptoms.

class HealthChatbot:
def __init__(self):
self.symptoms = []
def greet_user(self):
print("Hello! I am your health chatbot. Let's check your symptoms.")
def ask_symptoms(self):
print("Please answer the following questions with 'yes' or 'no'.")
self.symptoms.append(input("Do you have a fever? ").lower())
self.symptoms.append(input("Do you have a cough? ").lower())
self.symptoms.append(input("Do you have difficulty breathing? ").lower())
def diagnose(self):
fever = self.symptoms[0] == 'yes'
cough = self.symptoms[1] == 'yes'
difficulty_breathing = self.symptoms[2] == 'yes'
if fever and cough and difficulty_breathing:
print("Based on your symptoms, you may have pneumonia. Please consult a doctor immediately.")
elif fever and cough:
print("Based on your symptoms, you may have a common cold or flu. Get plenty of rest and
fluids.")
elif difficulty_breathing:
print("Based on your symptoms, you may have a respiratory issue. Seek medical attention
promptly.")
else:
print("Based on your symptoms, you seem to be generally healthy. However, if you feel unwell,
consult a doctor.")
def start(self):
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

self.greet_user()
self.ask_symptoms()
self.diagnose()
if __name__ == "__main__":
chatbot = HealthChatbot()
chatbot.start()

Q23:-Write a python program using SVM and TFIDM analysis the given corpus of words.

from sklearn.datasets import fetch_20newsgroups

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
categories = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']
news = fetch_20newsgroups(categories=categories)
X_train, X_test, y_train, y_test = train_test_split(news.data, news.target, test_size=0.2, random_state=42)
model = make_pipeline(TfidfVectorizer(), SVC())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Q24:-Write a python program to calculate bag of words after carry out the preprocessing of the
text as following convert all the text into lowercase alphabet and replace all the punctuation with a
Space.

from collections import Counter

def preprocess_text(text):
text = text.upper()
for punctuation in '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~':
text = text.replace(punctuation, ' ')
return text
def create_bag_of_words(text):
text = preprocess_text(text)
words = text.split()
bag_of_words = Counter(words)
return bag_of_words
Name:- Vipin Rawat Course:- MCA Section:- 4E
Roll No:-

text = "HOW how is the boss is a .Simple Example is sometimes better than better."
bow = create_bag_of_words(text)
print("Bag of Words:")
for word, count in bow.items():
print(f"{word}: {count}")

NLP Lab Manual
No ratings yet
NLP Lab Manual
21 pages
20BCP112 - NLP Lab - LAB - Manual
No ratings yet
20BCP112 - NLP Lab - LAB - Manual
65 pages
NLP Lab Work
No ratings yet
NLP Lab Work
34 pages
20BCP123 - NLP Lab Manual
No ratings yet
20BCP123 - NLP Lab Manual
45 pages
NLP-Lab Manual - Ashwini - Kachare
No ratings yet
NLP-Lab Manual - Ashwini - Kachare
41 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
32 pages
C24064 - NLP - Lab Manual
No ratings yet
C24064 - NLP - Lab Manual
28 pages
LP Vi Manual
No ratings yet
LP Vi Manual
77 pages
NLP 02
No ratings yet
NLP 02
6 pages
Ai&Ml Bai601 NLP Lab Manual
No ratings yet
Ai&Ml Bai601 NLP Lab Manual
48 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
17 pages
NLP Lab File
No ratings yet
NLP Lab File
15 pages
NLP Pratical
No ratings yet
NLP Pratical
14 pages
NLP Lab File
No ratings yet
NLP Lab File
13 pages
AI Lab Manual Aktu
No ratings yet
AI Lab Manual Aktu
11 pages
For Assignment-10 (Machine Learning With Python - NLP-2)
No ratings yet
For Assignment-10 (Machine Learning With Python - NLP-2)
37 pages
NLP Assignment (917722H031)
No ratings yet
NLP Assignment (917722H031)
18 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
19 pages
NLP Lab - Manual
No ratings yet
NLP Lab - Manual
33 pages
H7 W5 NLP - Merged
No ratings yet
H7 W5 NLP - Merged
17 pages
R22 NLP Python Programs
No ratings yet
R22 NLP Python Programs
15 pages
Wsma Final Manual
No ratings yet
Wsma Final Manual
58 pages
NLPPractical
No ratings yet
NLPPractical
12 pages
Final NLP Lab File
No ratings yet
Final NLP Lab File
28 pages
CSE 3652 Lab Record Format - PDF
No ratings yet
CSE 3652 Lab Record Format - PDF
13 pages
Programs Code
No ratings yet
Programs Code
7 pages
NLP Lab Assignment-2
No ratings yet
NLP Lab Assignment-2
6 pages
NLP
No ratings yet
NLP
12 pages
NLP Lab Complete
No ratings yet
NLP Lab Complete
23 pages
Lab2 IR
No ratings yet
Lab2 IR
16 pages
Jal Patel NLP
No ratings yet
Jal Patel NLP
32 pages
Tinywow Pythass3 77951173
No ratings yet
Tinywow Pythass3 77951173
17 pages
Record
No ratings yet
Record
6 pages
SK NLP Practical (FS)
No ratings yet
SK NLP Practical (FS)
22 pages
1a NLTK
No ratings yet
1a NLTK
10 pages
NLP - Practical List
No ratings yet
NLP - Practical List
14 pages
ASTW RA03 PracticalManual
No ratings yet
ASTW RA03 PracticalManual
18 pages
7 Idf
No ratings yet
7 Idf
5 pages
NLP Lab1
No ratings yet
NLP Lab1
6 pages
NLP Programs
No ratings yet
NLP Programs
13 pages
DSBD 7 Ass
No ratings yet
DSBD 7 Ass
9 pages
NLP Lab Programs
No ratings yet
NLP Lab Programs
3 pages
NLP Smitpatel
No ratings yet
NLP Smitpatel
32 pages
1 - Write A Python Program To Perform Following Tasks On Text A) Tokenization
No ratings yet
1 - Write A Python Program To Perform Following Tasks On Text A) Tokenization
13 pages
Text Preprocessing For NLP
No ratings yet
Text Preprocessing For NLP
15 pages
123 NLP 456
No ratings yet
123 NLP 456
4 pages
Batch 2
No ratings yet
Batch 2
13 pages
Pranay Assign 1
No ratings yet
Pranay Assign 1
2 pages
NLP Manual (1-12)
No ratings yet
NLP Manual (1-12)
55 pages
DS 7
No ratings yet
DS 7
3 pages
Python NLP Assignment
No ratings yet
Python NLP Assignment
9 pages
NLP Experiment 2
No ratings yet
NLP Experiment 2
5 pages
NLP Lab1
No ratings yet
NLP Lab1
2 pages
Exp1 NLP
No ratings yet
Exp1 NLP
2 pages
Technical and Operational Definitions
100% (5)
Technical and Operational Definitions
22 pages
Natural Language Processing
No ratings yet
Natural Language Processing
17 pages
Shubham Jade MSC It 31031420010 NLP Practical Journal
No ratings yet
Shubham Jade MSC It 31031420010 NLP Practical Journal
17 pages
Unit 1-4 - Loc - Key
No ratings yet
Unit 1-4 - Loc - Key
5 pages
MP20XV MP20XV (A979) Parts Manual: Yale Europe Materials Handling Limited
No ratings yet
MP20XV MP20XV (A979) Parts Manual: Yale Europe Materials Handling Limited
112 pages
Modul Ajar Chapter 2
No ratings yet
Modul Ajar Chapter 2
22 pages
Machine Learning NLP LAB Sayak Mallick
No ratings yet
Machine Learning NLP LAB Sayak Mallick
4 pages
Traditional Filipino Games or Indigenous Games in The Philippines Material
No ratings yet
Traditional Filipino Games or Indigenous Games in The Philippines Material
7 pages
Esl Beginner Homework
100% (1)
Esl Beginner Homework
8 pages
(Ebook PDF) Getting It RIGHT For Young Children From Diverse Backgrounds 2nd Edition PDF Download
80% (5)
(Ebook PDF) Getting It RIGHT For Young Children From Diverse Backgrounds 2nd Edition PDF Download
53 pages
Group 2 - Region 3 Central Luzon PDF
No ratings yet
Group 2 - Region 3 Central Luzon PDF
17 pages
LP English Grade3 Complex Sentences
100% (1)
LP English Grade3 Complex Sentences
7 pages
Memory Decoding
No ratings yet
Memory Decoding
6 pages
Sample Academic Vocabulary For Absolutely Everyone 23p
No ratings yet
Sample Academic Vocabulary For Absolutely Everyone 23p
23 pages
Ketab Sana'a en (1564)
No ratings yet
Ketab Sana'a en (1564)
320 pages
Adjectives Lesson 1
No ratings yet
Adjectives Lesson 1
52 pages
Vocabulary Improvement Plan
No ratings yet
Vocabulary Improvement Plan
8 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
15 pages
Full New Treasures of Sumerian Literature When The Moon Fell From The Sky and Other Works Mark Cohen Ebook All Chapters
No ratings yet
Full New Treasures of Sumerian Literature When The Moon Fell From The Sky and Other Works Mark Cohen Ebook All Chapters
40 pages
Greek Exegesis 2 Tim 4
No ratings yet
Greek Exegesis 2 Tim 4
4 pages
Experiments in The Perception of Stress by D.B. Fry (1958)
No ratings yet
Experiments in The Perception of Stress by D.B. Fry (1958)
28 pages
Seminar 1
No ratings yet
Seminar 1
21 pages
Citation Format Uk
100% (2)
Citation Format Uk
6 pages
Bahasa Inggris Kelas 11 - Follow The Instructions
No ratings yet
Bahasa Inggris Kelas 11 - Follow The Instructions
6 pages
Eng Module Week 1
No ratings yet
Eng Module Week 1
19 pages
The International Encyclopedia of Language and Social Interaction - 2015 - Vaughan
No ratings yet
The International Encyclopedia of Language and Social Interaction - 2015 - Vaughan
17 pages
Reflexive and Reciprocal Pronouns
No ratings yet
Reflexive and Reciprocal Pronouns
2 pages
Writing Email
No ratings yet
Writing Email
13 pages
English CA 2.2
No ratings yet
English CA 2.2
6 pages
CAE - Class 1
No ratings yet
CAE - Class 1
5 pages
Mandarin Guide
No ratings yet
Mandarin Guide
2 pages
The Turing Test Is Not A Good Benchmark For Thought in LLMS: Correspondence
No ratings yet
The Turing Test Is Not A Good Benchmark For Thought in LLMS: Correspondence
2 pages
CVGleb Polevalov
No ratings yet
CVGleb Polevalov
2 pages
Irregular Verbs
No ratings yet
Irregular Verbs
4 pages

Sahil NLP

Uploaded by

Sahil NLP

Uploaded by

Name:- Vipin Rawat Course:- MCA Section:- 4E

Q1:-Write a python program to tokenization to statement.

Q2:-Remove the stopwords.

from nltk.corpus import stopwords

Q3:-To carry out Stemming or Lemmatization.

from nltk.stem import PorterStemmer

Q4:-To Carry out parts of speech tagging.

Q6:-Regular expression tagger.

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.feature_extraction.text import CountVectorizer

from gensim.models import Word2Vec

for sentence in sentences:

print("Filtered text:", filtered_text)

Q16:-Write a python program to remove stopwords from customize list of stopwords.

def remove_stopwords_custom(text, custom_stopwords):

Q17:-Write a python program using above library of stopwords.

def remove_stopwords_custom(text, custom_stopwords):

from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

from gensim.parsing.preprocessing import remove_stopwords as gensim_remove_stopwords

from nltk.stem import SnowballStemmer

Q18:-Write a python program to describe the stemming using porter Stemming.

from nltk.stem import PorterStemmer

Q19:-Write a python program to describe the stemming using Lancaster Stemming.

from nltk.stem import LancasterStemmer

for original, stemmed in zip(words, stemmed_words):

from sklearn.feature_extraction.text import TfidfVectorizer

Q21:-Write a python program to POS tagging.

from sklearn.datasets import fetch_20newsgroups

from collections import Counter

You might also like