NLP - Lab - 1.ipynb - Colab
NLP - Lab - 1.ipynb - Colab
ipynb - Colab
keyboard_arrow_down Tokenization
import nltk
nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize
def tokenize_text(text):
tokens = word_tokenize(text)
return tokens
# Example usage:
text = "This is an example sentence. Tokenization is important in NLP."
tokens = tokenize_text(text)
tokens
nltk.download('stopwords')
nltk.download('punkt')
https://colab.research.google.com/drive/1ZkyIk18BbWhzTjFZ_358EaB-3VBgogi9?authuser=3#scrollTo=eq6UqK-1k9j4&printMode=true 1/4
3/3/25, 12:37 PM NLP_Lab_1.ipynb - Colab
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data] Package punkt is already up-to-date!
text = "The quick brown fox jumps over the lazy dog."
words = word_tokenize(text)
pos_tags = nltk.pos_tag(words)
filtered_text = [word for word, tag in pos_tags if tag.startswith('NN')] # Keep only nouns
print(filtered_text) # Output: ['fox', 'dog']
keyboard_arrow_down Stemming
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
# Initialize the Porter Stemmer
ps = PorterStemmer()
# Example sentence
text = "Running runners run easily and are loving the adventure."
# Tokenize the sentence
words = word_tokenize(text)
# Apply stemming
stemmed_words = [ps.stem(word) for word in words]
print("Stemmed Words:", stemmed_words)
Stemmed Words: ['run', 'runner', 'run', 'easili', 'and', 'are', 'love', 'the', 'adventur
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data] Package punkt is already up-to-date!
keyboard_arrow_down lemmatization
https://colab.research.google.com/drive/1ZkyIk18BbWhzTjFZ_358EaB-3VBgogi9?authuser=3#scrollTo=eq6UqK-1k9j4&printMode=true 2/4
3/3/25, 12:37 PM NLP_Lab_1.ipynb - Colab
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
# Download necessary datasets
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('omw-1.4')
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()
# Sample text
text = "The leaves are falling from the trees and the wolves are howling."
# Tokenize words
words = word_tokenize(text)
# Apply lemmatization
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
print("Lemmatized Words:", lemmatized_words)
https://colab.research.google.com/drive/1ZkyIk18BbWhzTjFZ_358EaB-3VBgogi9?authuser=3#scrollTo=eq6UqK-1k9j4&printMode=true 3/4
3/3/25, 12:37 PM NLP_Lab_1.ipynb - Colab
https://colab.research.google.com/drive/1ZkyIk18BbWhzTjFZ_358EaB-3VBgogi9?authuser=3#scrollTo=eq6UqK-1k9j4&printMode=true 4/4