NLP Lab Programs
NLP Lab Programs
1. Tokenize a text
from nltk.tokenize import word_tokenize, sent_tokenize
import nltk
# Example text
text = "NLP makes machines understand language. Tokenization is the first step."
# Sentence Tokenization
print("Sentences:", sent_tokenize(text))
# Word Tokenization
print("Words:", word_tokenize(text))
output:
# Sentence Tokenization
sentences = sent_tokenize(text)
import nltk
nltk.download('punkt')
nltk.download('stopwords')
# Example text
stop_words = set(stopwords.words('english'))
words = word_tokenize(text)
output:
# Example text
text = "Python is great! It's simple and powerful."
output:
5. perform stemming
# import these modules
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
ps = PorterStemmer()
for w in words:
print(w, " : ", ps.stem(w))
output: