0% found this document useful (0 votes)

7 views14 pages

NLP Pratical

Uploaded by

ritikapurse1111

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

7 views14 pages

NLP Pratical

Uploaded by

ritikapurse1111

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 14

Practical 1 :

Write program to perform tokenization over word and sentence on English and
Hindi Text.

INPUT

!pip install nltk

import nltk
nltk.download('punkt_tab')

# Importing the library

import nltk
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize

# Importing the Data

dataset = """Hello Mr. Watson, how are you doing today?
The weather is awsome. The garden is green.
We should go out for a walk."""

# Tokenize the Sentences

print(sent_tokenize(dataset))

for i in sent_tokenize(dataset):
print(i)

OUTPUT
['Hello Mr. Watson, how are you doing today?', 'The weather is awsome.', 'The garden is green.',
'We should go out for a walk.'] Hello Mr. Watson, how are you doing today? The weather is
awsome. The garden is green. We should go out for a walk.

# Tokenize the Words

print(word_tokenize(dataset))

OUTPUT
['Hello', 'Mr.', 'Watson', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'awsome', '.',
'The', 'garden', 'is', 'green', '.', 'We', 'should', 'go', 'out', 'for', 'a', 'walk', '.']

from nltk.tokenize import word_tokenize

print("word_tokenize",word_tokenize(dataset))

OUTPUT
den', 'is', 'green', '.', 'We', 'should', 'go', 'out', 'for', 'a', 'walk', '.']
from nltk.tokenize import TreebankWordTokenizer
#tokenizers work by separating the words using punctuation and
spaces.
tokenizer = TreebankWordTokenizer()
print("TreebankWordTokenizer",tokenizer.tokenize(dataset))

OUTPUT
TreebankWordTokenizer ['Hello', 'Mr.', 'Watson', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The',
'weather', 'is', 'awsome.', 'The', 'garden', 'is', 'green.', 'We', 'should', 'go', 'out', 'for', 'a', 'walk', '.']

from nltk.tokenize import word_tokenize

text = "इस लेख में हम आपको इंटरनेट के बारे में सम्पू र्ण जानकारी देने
का प्रयास करेंगे। आज के आधुनिक युग में अधिकतर काम इंटरनेट के मा"

print("word_tokenize",word_tokenize(text))

OUTPUT
word_tokenize ['इस', 'लेख', 'में', 'हम', 'आपको', 'इंटरनेट', 'के', 'बारे',
'में', 'सम्पू', 'र्ण', 'जानकारी', 'देने', 'का', 'प्रयास',
'करेंगे।', 'आज', 'के', 'आधुनिक', 'युग', 'में', 'अधिकतर', 'काम',
'इंटरनेट', 'के', 'मा']

from nltk.tokenize import sent_tokenize

print("Sentence_tokenize",sent_tokenize(text))

OUTPUT
Sentence_tokenize ['इस लेख में हम आपको इंटरनेट के बारे में सम्पू
र्ण जानकारी देने का प्रयास करेंगे। आज के आधुनिक युग में
अधिकतर काम इंटरनेट के मा']

from nltk.tokenize import WordPunctTokenizer

#It seperates the punctuation from the words.
tokenizer = WordPunctTokenizer()
print("WordPunctTokenizer",tokenizer.tokenize(text))

OUTPUT
WordPunctTokenizer ['इस', 'ल', 'े', 'ख', 'म', 'ें', 'हम', 'आपक', 'ो', 'इ', 'ं', 'टरन', 'े', 'ट', 'क',
'े', 'ब', 'ा', 'र', 'े', 'म', 'ें', 'सम', '्', 'प', 'ू', 'र', '्', 'ण', 'ज', 'ा', 'नक', 'ा', 'र', 'ी', 'द', 'े',
'न', 'े', 'क', 'ा', 'प', '्', 'रय', 'ा', 'स', 'कर', 'ें', 'ग', 'े।', 'आज', 'क', 'े', 'आध', 'ु', 'न', 'ि', 'क',
'य', 'ु', 'ग', 'म', 'ें', 'अध', 'ि', 'कतर', 'क', 'ा', 'म', 'इ', 'ं', 'टरन', 'े', 'ट', 'क', 'े', 'म', 'ा']

from nltk.tokenize import TreebankWordTokenizer

#tokenizers work by separating the words using punctuation and
spaces.
tokenizer = TreebankWordTokenizer()
print("TreebankWordTokenizer",tokenizer.tokenize(text))

OUTPUT
TreebankWordTokenizer ['इस', 'लेख', 'में', 'हम', 'आपको', 'इंटरनेट', 'के', 'बारे',
'में', 'सम्पू', 'र्ण', 'जानकारी', 'देने', 'का', 'प्रयास', 'करेंगे।', 'आज',
'के', 'आधुनिक', 'युग', 'में', 'अधिकतर', 'काम', 'इंटरनेट', 'के', 'मा']

from nltk.tokenize import WordPunctTokenizer

#It seperates the punctuation from the words.
tokenizer = WordPunctTokenizer()
print("WordPunctTokenizer",tokenizer.tokenize(text))

OUTPUT

WordPunctTokenizer ['इस', 'ल', 'े', 'ख', 'म', 'ें', 'हम', 'आपक', 'ो', 'इ', 'ं', 'टरन', 'े', 'ट', 'क',
'े', 'ब', 'ा', 'र', 'े', 'म', 'ें', 'सम', '्', 'प', 'ू', 'र', '्', 'ण', 'ज', 'ा', 'नक', 'ा', 'र', 'ी', 'द', 'े',
'न', 'े', 'क', 'ा', 'प', '्', 'रय', 'ा', 'स', 'कर', 'ें', 'ग', 'े।', 'आज', 'क', 'े', 'आध', 'ु', 'न', 'ि', 'क',
'य', 'ु', 'ग', 'म', 'ें', 'अध', 'ि', 'कतर', 'क', 'ा', 'म', 'इ', 'ं', 'टरन', 'े', 'ट', 'क', 'े', 'म', 'ा']
Practical 2

Write a Program to identify Stopwards in a given sentence in English.

INPUT

import nltk
nltk.download('all')

import nltk
nltk.download('stopwords') # only if all are not downloaded
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

dataset = """They covered the precious mahogany coffin with a

brown amalgam of rocks, decomposed organisms, and w
It was my turn to take the shovel, but I felt too ashamed to
dutifully send her off when I had not p
I refused to throw dirt on her. I refused to let go of my
grandmother, to accept a death I had not s
to believe that an illness could not only interrupt, but steal a
beloved life."""

# Create a set of English Stop Words

stop_words = set(stopwords.words("english"))
print(stop_words)
print()
print("Total count of Stopwords :",len(stop_words))

OUTPUT

{'same', "they've", "he'd", 'hers', 'y', 'over', "should've", "mightn't", 'because', 'too', "that'll", 'an',
'am', 'while', 'can', "i'd", 'if', "she'll", 'any', "didn't", 'off', 'are', "mustn't", 'was', 'with', 'under', 'were',
'above', 'needn', 'should', 'him', 'they', 'through', 'he', 'again', 'its', 'more', 'herself', "won't", 'don',
"you'll", 'up', "you'd", 'll', 'ain', 'them', 'some', 'at', 'shan', 'will', "wouldn't", 'couldn', 'hadn', 'now',
'until', 'yours', 'ma', 'her', 'd', 'most', 'himself', 'doing', 'being', "i'm", 'yourself', 'for', "i'll", "hadn't",
'both', 'm', 'this', "you're", "needn't", 'there', "shouldn't", 'mightn', 'during', 'how', 'the', 'those', 'on',
"hasn't", 's', 'ourselves', 'you', "he's", 'is', 'as', 'than', 'theirs', 'we', 'o', 'or', 'i', 'she', 'his', 'own', 'a', 'do',
"it'd", 'shouldn', 'weren', "they'll", 'whom', 'had', "we'll", 'each', 'such', 'wasn', 'nor', 'doesn', 'these',
'between', "she'd", 'not', "he'll", "we'd", "don't", "we're", 'in', "she's", 'few', 'into', "doesn't", 'it',
"couldn't", 'but', 'when', "you've", 'your', 'yourselves', 'then', 'from', 'only', 'to', "i've", 'here', 'my',
'hasn', 'against', "they're", 'have', 'myself', 'before', 'mustn', 'aren', 'down', 'wouldn', 'ours', "it's",
"isn't", "it'll", 'why', 'other', 'won', 'isn', 'me', 'so', 'further', 'has', 've', "we've", 'by', 'didn', "shan't",
'did', 'been', 'no', "haven't", 'themselves', 'of', 'once', 'their', 'our', 'where', 'what', 're', "wasn't",
'having', 'after', 'which', 'be', "aren't", "they'd", "weren't", 'out', 'all', 'itself', 'about', 'that', 'and', 't',
'very', 'haven', 'below', 'who', 'just', 'does'} Total count of Stopwords : 198
words = word_tokenize(dataset)
print(words)
print()
print("Total words :",len(words))

OUTPUT

['They', 'covered', 'the', 'precious', 'mahogany', 'coffin', 'with', 'a', 'brown', 'amalgam', 'of', 'rocks',
',', 'decomposed', 'organisms', ',', 'and', 'w', 'It', 'was', 'my', 'turn', 'to', 'take', 'the', 'shovel', ',', 'but', 'I',
'felt', 'too', 'ashamed', 'to', 'dutifully', 'send', 'her', 'off', 'when', 'I', 'had', 'not', 'p', 'I', 'refused', 'to',
'throw', 'dirt', 'on', 'her', '.', 'I', 'refused', 'to', 'let', 'go', 'of', 'my', 'grandmother', ',', 'to', 'accept', 'a',
'death', 'I', 'had', 'not', 's', 'to', 'believe', 'that', 'an', 'illness', 'could', 'not', 'only', 'interrupt', ',', 'but',
'steal', 'a', 'beloved', 'life', '.'] Total words : 83

filtered_sentence = []
for w in words:
if w not in stop_words:
filtered_sentence.append(w)
print(filtered_sentence)
print()
print("After removing stopwords", len(filtered_sentence))

OUTPUT

filtered_sentence = []
for w in words:
if w not in stop_words:
filtered_sentence.append(w)
print(filtered_sentence)
print()
print("After removing stopwords", len(filtered_sentence))

OUTPUT

['They', 'covered', 'precious', 'mahogany', 'coffin', 'brown', 'amalgam', 'rocks', ',', 'decomposed',
'organisms', ',', 'w', 'It', 'turn', 'take', 'shovel', ',', 'I', 'felt', 'ashamed', 'dutifully', 'send', 'I', 'p', 'I',
'refused', 'throw', 'dirt', '.', 'I', 'refused', 'let', 'go', 'grandmother', ',', 'accept', 'death', 'I', 'believe',
'illness', 'could', 'interrupt', ',', 'steal', 'beloved', 'life', '.'] After removing stopwords 48
Practical 3

Write a program to perform Stemming and Lemmatization for English Text

INPUT

import nltk
nltk.download("all")

# import these modules

from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
ps = PorterStemmer()
# choose some words to be stemmed
words = ["program", "programs", "programmer", "programming",
"programmers"]
for w in words:
print(w, " : ", ps.stem(w))

OUTPUT

program : program
programs : program
programmer : programm
programming : program
programmers : programm

# importing modules
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
ps = PorterStemmer()
sentence = "Programmers program with programming languages"
words = word_tokenize(sentence)
for w in words:
print(w, " : ", ps.stem(w))

OUTPUT

Programmers : programm
program : program
with : with
programming : program
languages : languag
from nltk.stem import PorterStemmer
e_words= ["wait", "waiting", "waited", "waits"]
ps =PorterStemmer()
for w in e_words:
rootWord=ps.stem(w)
print(rootWord)

OUTPUT

wait
Wait
Wait
Wait

from nltk.stem import PorterStemmer

from nltk.tokenize import sent_tokenize, word_tokenize
sentence="Hello Kajal, You have to build a very good site and I
love visiting your site."
words = word_tokenize(sentence)
ps = PorterStemmer()
for w in words:
rootWord=ps.stem(w)
print(rootWord)

OUTPUT

hello kajal
,
you
have
To
Build
A
veri
good
site
and
I
Love
visit
your
Sit

#create an object of class PorterStemmer

from nltk.stem import PorterStemmer
from nltk.stem import LancasterStemmer
porter = PorterStemmer()
lancaster=LancasterStemmer()
#provide a word to be stemmed
print("Porter Stemmer")
print(porter.stem("cats"))
print(porter.stem("trouble"))
print(porter.stem("troubling"))
print(porter.stem("troubled"))
print()
print("Lancaster Stemmer")
print(lancaster.stem("cats"))
print(lancaster.stem("trouble"))
print(lancaster.stem("troubling"))
print(lancaster.stem("troubled"))

OUTPUT

Porter Stemmer
Cat
troubl
troubl
troubl
Lancaster Stemmer cat troubl troubl troubl

#A list of words to be stemmed

word_list = ["friend", "friendship", "friends",
"friendships","stabil","destabilize","misunderstanding","railroad
"]
print("{0:20}{1:20}{2:20}".format("Word","Porter
Stemmer","lancaster Stemmer"))
for word in word_list:
print("{0:20}{1:20}
{2:20}".format(word,porter.stem(word),lancaster.stem(word)))

OUTPUT

sentence="Pythoners are very intelligent and work very pythonly

and now they are pythoning their way to success."
porter.stem(sentence)

OUTPUT
‘pythoners are very intelligent and work very pythonly and now they are pythoning their way to
success.’

#stemming
import nltk
from nltk.stem.porter import PorterStemmer

porter_stemmer = PorterStemmer()
text = "studies studying cries cry"
tokenization = nltk.word_tokenize(text)
for w in tokenization:
print("Stemming for {} is
{}".format(w,porter_stemmer.stem(w)))

OUTPUT:

Stemming for studies is studi

Stemming for studying is studi
Stemming for cries is cri
Stemming for cry is cri

# Lemmatization
import nltk
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
text = "studies studying cries cry"
tokenization = nltk.word_tokenize(text)
for w in tokenization:
print("Lemma for {} is {}".format(w,
wordnet_lemmatizer.lemmatize(w)))

OUTPUT

Lemma for studies is study

Lemma for studying is studying
Lemma for cries is cry
Lemma for cry is cry

Practical 4

Write a program to segregate Part of Speech (POS TAGGING) for English Text.

INPUT
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')

# Importing the libraries

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

# Importing the data

dataset = """Taj Mahal is one of the world’s most celebrated
structures
in the world.
It is a stunning symbol of Indian rich history"""

# Tokenize the data

new_data = word_tokenize(dataset)
new_data

OUTPUT

['Taj',
'Mahal',
'is',
'one',
'of',
'the',
'world',
'’',
's',
'most',
'celebrated',
'structures',
'in',
'the',
'world',
'.',
'It',
'is',
'a',
'stunning',
'symbol',
'of',
'Indian',
'rich',
'history']

# Tag Set
nltk.help.upenn_tagset()

OUTPUT
Practical 5

Write a program to perform Named Entity Recognition (NER) & Chunking on

English Text.
Named Entity Recognition
INPUT

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('maxent_ne_chunker_tab')

# Importing the libraries

from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk

dataset = """Abraham Lincoln was an American statesman and lawyer

who served as the 16th President of the United States"""

# Tokenize and tagging the data

dataset_tag = pos_tag(word_tokenize(dataset))
dataset_tag

OUTPUT
[('Abraham', 'NNP'),
('Lincoln', 'NNP'),
('was', 'VBD'),
('an', 'DT'),
('American', 'JJ'),
('statesman', 'NN'),
('and', 'CC'),
('lawyer', 'NN'),
('who', 'WP'),
('served', 'VBD'),
('as', 'IN'),
('the', 'DT'),
('16th', 'CD'),
('President', 'NNP'),
('of', 'IN'),
('the', 'DT'),
('United', 'NNP'),
('States', 'NNPS')]

# Apply Named Entity Recognition with ne_chunk

data_ner = ne_chunk(dataset_tag)
print(data_ner)

OUTPUT
(S
(PERSON Abraham/NNP)
(PERSON Lincoln/NNP)
was/VBD
an/DT
(GPE American/JJ)
statesman/NN
and/CC
lawyer/NN
who/WP
served/VBD
as/IN
the/DT
16th/CD
President/NNP
of/IN
the/DT
(GPE United/NNP States/NNPS))

#Chunking

# Importing the libraries

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import RegexpParser

dataset = """Taj Mahal is one of the world’s most celebrated

structures
in the world.
It is a stunning symbol of Indian rich history"""

# Tokenize the data

new_data = word_tokenize(dataset)
print(new_data)

OUTPUT

['Taj', 'Mahal', 'is', 'one', 'of', 'the', 'world', '’', 's', 'most', 'celebrated', 'structures', 'in',
'the', 'world', '.', 'It', 'is', 'a', 'stunning', 'symbol', 'of', 'Indian', 'rich', 'history']

# Apply the POS Tagging

postagging = pos_tag(new_data)
print(postagging)
OUTPUT

[('Taj', 'NNP'), ('Mahal', 'NNP'), ('is', 'VBZ'), ('one', 'CD'), ('of', 'IN'), ('the', 'DT'),
('world', 'NN'), ('’', 'NNP'), ('s', 'VBZ'), ('most', 'JJS'), ('celebrated', 'JJ'), ('structures',
'NNS'), ('in', 'IN'), ('the', 'DT'), ('world', 'NN'), ('.', '.'), ('It', 'PRP'), ('is', 'VBZ'), ('a',
'DT'), ('stunning', 'JJ'), ('symbol', 'NN'), ('of', 'IN'), ('Indian', 'JJ'), ('rich', 'JJ'), ('history',
'NN')]

# Define the sequence of Chunk

sequence_chunk = """
chunk:
{<NNPS>+}
{<NNP>+}
{<NN>+} """

chunk = RegexpParser(sequence_chunk)

chunk_result = chunk.parse(postagging)
print(chunk_result)

OUTPUT

(S (chunk Taj/NNP Mahal/NNP) is/VBZ one/CD of/IN the/DT (chunk world/NN) (chunk ’/NNP)
s/VBZ most/JJS celebrated/JJ structures/NNS in/IN the/DT (chunk world/NN) ./. It/PRP is/VBZ
a/DT stunning/JJ (chunk symbol/NN) of/IN Indian/JJ rich/JJ (chunk history/NN))

The Ranieri-Roberts Approach To Ancient Greek
No ratings yet
The Ranieri-Roberts Approach To Ancient Greek
39 pages
20BCP112 - NLP Lab - LAB - Manual
No ratings yet
20BCP112 - NLP Lab - LAB - Manual
65 pages
20BCP123 - NLP Lab Manual
No ratings yet
20BCP123 - NLP Lab Manual
45 pages
NLP-Lab Manual - Ashwini - Kachare
No ratings yet
NLP-Lab Manual - Ashwini - Kachare
41 pages
NLP Lab Work
No ratings yet
NLP Lab Work
34 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
25 pages
Sahil NLP
No ratings yet
Sahil NLP
16 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
32 pages
NLP Lab File
No ratings yet
NLP Lab File
13 pages
NLP Lab File
No ratings yet
NLP Lab File
15 pages
1 - Write A Python Program To Perform Following Tasks On Text A) Tokenization
No ratings yet
1 - Write A Python Program To Perform Following Tasks On Text A) Tokenization
13 pages
NLP Manual (1-12) 1
No ratings yet
NLP Manual (1-12) 1
56 pages
NLTK Tutorial
No ratings yet
NLTK Tutorial
33 pages
NLP Using Python
No ratings yet
NLP Using Python
4 pages
NLP Lab File
No ratings yet
NLP Lab File
13 pages
Soundarya 256 NLP Practs
No ratings yet
Soundarya 256 NLP Practs
14 pages
Lab 2
No ratings yet
Lab 2
49 pages
Lab-1 - Tokenization, Stemming, Stopwords - Jupyter Notebook
No ratings yet
Lab-1 - Tokenization, Stemming, Stopwords - Jupyter Notebook
15 pages
A7 Dsbda Sana
No ratings yet
A7 Dsbda Sana
15 pages
Natural Language Processing Lab Manual
No ratings yet
Natural Language Processing Lab Manual
24 pages
Nelson Grammar Teachers Book Look Inside
33% (3)
Nelson Grammar Teachers Book Look Inside
1 page
Noun & Its Types
No ratings yet
Noun & Its Types
14 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
17 pages
Assignment#6-1 - 11-Arid-3624 - Jupyter Notebook
No ratings yet
Assignment#6-1 - 11-Arid-3624 - Jupyter Notebook
6 pages
NLP Expts
No ratings yet
NLP Expts
41 pages
R22 NLP Python Programs
No ratings yet
R22 NLP Python Programs
15 pages
Text Mining Basics
No ratings yet
Text Mining Basics
16 pages
NLP Programming
No ratings yet
NLP Programming
39 pages
NLPPractical
No ratings yet
NLPPractical
12 pages
NLP Lab1
No ratings yet
NLP Lab1
6 pages
7 Idf
No ratings yet
7 Idf
5 pages
01 NLP - Merged Vinay
No ratings yet
01 NLP - Merged Vinay
27 pages
SK NLP Practical (FS)
No ratings yet
SK NLP Practical (FS)
22 pages
NLP - Record (Weeks 1-12)
No ratings yet
NLP - Record (Weeks 1-12)
41 pages
3.Nlp Lab Manual
No ratings yet
3.Nlp Lab Manual
18 pages
Topik-II Advanced Level Grammar
100% (2)
Topik-II Advanced Level Grammar
30 pages
UBC Summer School in NLP - VSP 2019 Lecture 10
No ratings yet
UBC Summer School in NLP - VSP 2019 Lecture 10
33 pages
NLP Lab - Manual
No ratings yet
NLP Lab - Manual
33 pages
NLP Lab Programs
No ratings yet
NLP Lab Programs
3 pages
NLP FinAL
No ratings yet
NLP FinAL
27 pages
7 TextAnalysis
No ratings yet
7 TextAnalysis
3 pages
NLP Exp-3
No ratings yet
NLP Exp-3
3 pages
NLP
No ratings yet
NLP
12 pages
Wsma Final Manual
No ratings yet
Wsma Final Manual
58 pages
Final NLP Lab File
No ratings yet
Final NLP Lab File
28 pages
Jal Patel NLP
No ratings yet
Jal Patel NLP
32 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
19 pages
NLP - Practical List
No ratings yet
NLP - Practical List
14 pages
Natural Language Processing: Practical 1
No ratings yet
Natural Language Processing: Practical 1
64 pages
Text Processing
No ratings yet
Text Processing
16 pages
NLP Manual (1-12)
No ratings yet
NLP Manual (1-12)
54 pages
NLP Manual (1-12)
No ratings yet
NLP Manual (1-12)
55 pages
Natural Language Processing
No ratings yet
Natural Language Processing
17 pages
NLP Smitpatel
No ratings yet
NLP Smitpatel
32 pages
DSBDA7
No ratings yet
DSBDA7
2 pages
Text Preprocessing For NLP
No ratings yet
Text Preprocessing For NLP
15 pages
AM604PC Natural Language Processing LAB R22 AI&ML 3rd Yr 2nd Sem AM604PC Natural Language Processing LAB R22 AI&ML 3rd Yr 2nd Sem
No ratings yet
AM604PC Natural Language Processing LAB R22 AI&ML 3rd Yr 2nd Sem AM604PC Natural Language Processing LAB R22 AI&ML 3rd Yr 2nd Sem
20 pages
NLP Programs
No ratings yet
NLP Programs
5 pages
Unit I - Grammar: Read and Make A Summary of Your Learning
No ratings yet
Unit I - Grammar: Read and Make A Summary of Your Learning
10 pages
NLP 02
No ratings yet
NLP 02
6 pages
Shubham Jade MSC It 31031420010 NLP Practical Journal
No ratings yet
Shubham Jade MSC It 31031420010 NLP Practical Journal
17 pages
Shortened Clause
No ratings yet
Shortened Clause
14 pages
NLP Experiment 2
No ratings yet
NLP Experiment 2
5 pages
Lengua Extranjera 1 TP1 Nuevo para Rendir en Agosto
0% (1)
Lengua Extranjera 1 TP1 Nuevo para Rendir en Agosto
3 pages
Lab Prgms Weel1-Output
No ratings yet
Lab Prgms Weel1-Output
4 pages
IELTS Reading Tips
No ratings yet
IELTS Reading Tips
3 pages
Unit 8 Conditional Clauses PDF
No ratings yet
Unit 8 Conditional Clauses PDF
3 pages
Cause and Effect Linkers: Unit 11. Citizen Z. Cambridge B2
No ratings yet
Cause and Effect Linkers: Unit 11. Citizen Z. Cambridge B2
8 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
15 pages
Giáo Trình Reading Level 1 - 1
No ratings yet
Giáo Trình Reading Level 1 - 1
67 pages
LEVEL 4 UNIT 4 Modal Verbs
No ratings yet
LEVEL 4 UNIT 4 Modal Verbs
11 pages
eNGEXAM TEST 10 Use of ENGLISH
No ratings yet
eNGEXAM TEST 10 Use of ENGLISH
9 pages
Volume 34 Wh-Clauses in English - Aspects of Theory and Desception by Joe Trotta
No ratings yet
Volume 34 Wh-Clauses in English - Aspects of Theory and Desception by Joe Trotta
238 pages
Exercicis PS I PC
No ratings yet
Exercicis PS I PC
3 pages
Noun Ending
No ratings yet
Noun Ending
9 pages
Adverbs of Manner: 1 - Complete The Sentences With The Best Adverb
0% (1)
Adverbs of Manner: 1 - Complete The Sentences With The Best Adverb
2 pages
What Is Gendered Language
No ratings yet
What Is Gendered Language
3 pages
Pesetsky 16 Complementize.10
No ratings yet
Pesetsky 16 Complementize.10
38 pages
ENGLISH 7 Module 3
No ratings yet
ENGLISH 7 Module 3
11 pages
Passive Voice Converter
No ratings yet
Passive Voice Converter
12 pages
KORB Kap 1 Fixed
No ratings yet
KORB Kap 1 Fixed
17 pages
Keys Grammar Is So Easy
No ratings yet
Keys Grammar Is So Easy
28 pages
Tarea 5 Englesh 2
No ratings yet
Tarea 5 Englesh 2
7 pages
Initial Test Paper: 7 Grade
No ratings yet
Initial Test Paper: 7 Grade
2 pages
English: Monday English Module 4, Lesson 1 Quarter I, Week 5
No ratings yet
English: Monday English Module 4, Lesson 1 Quarter I, Week 5
2 pages
DEFINING AND NON DEFINING CLAUSES (Spanish)
No ratings yet
DEFINING AND NON DEFINING CLAUSES (Spanish)
4 pages
1.3 Capitalization
No ratings yet
1.3 Capitalization
3 pages
60 Days Schedule For
No ratings yet
60 Days Schedule For
2 pages
Worksheet Pats of Speech
No ratings yet
Worksheet Pats of Speech
1 page
The Cyberpunk Fakebook
From Everand
The Cyberpunk Fakebook
R. U. Sirius
No ratings yet

NLP Pratical

Uploaded by

NLP Pratical

Uploaded by

Practical 1 :

!pip install nltk

# Importing the library

# Importing the Data

# Tokenize the Sentences

# Tokenize the Words

from nltk.tokenize import word_tokenize

from nltk.tokenize import word_tokenize

from nltk.tokenize import sent_tokenize

from nltk.tokenize import WordPunctTokenizer

from nltk.tokenize import TreebankWordTokenizer

from nltk.tokenize import WordPunctTokenizer

Write a Program to identify Stopwards in a given sentence in English.

dataset = """They covered the precious mahogany coffin with a

# Create a set of English Stop Words

Write a program to perform Stemming and Lemmatization for English Text

# import these modules

from nltk.stem import PorterStemmer

#create an object of class PorterStemmer

#A list of words to be stemmed

sentence="Pythoners are very intelligent and work very pythonly

Stemming for studies is studi

Lemma for studies is study

# Importing the libraries

# Importing the data

# Tokenize the data

Write a program to perform Named Entity Recognition (NER) & Chunking on

# Importing the libraries

dataset = """Abraham Lincoln was an American statesman and lawyer

# Tokenize and tagging the data

# Apply Named Entity Recognition with ne_chunk

# Importing the libraries

dataset = """Taj Mahal is one of the world’s most celebrated

# Tokenize the data

# Apply the POS Tagging

# Define the sequence of Chunk

You might also like