0% found this document useful (0 votes)
3 views1 page

NLP 2

Uploaded by

siblu khan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views1 page

NLP 2

Uploaded by

siblu khan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 1

NLP Expt-2

Program:
import nltk #importing nltk library
nltk.download('punkt') #downloading punkt for tokenization
from nltk.tokenize import word_tokenize #importing word_tokenize for tokenization

#Sample text data


text = "My Name is saquib patel. This is sample 454545 text .yuswr9 ...jsd..,.fjk ,,
@#$. ,,sdhjghj"
print("Tnput Text: \n", text)

#Tokenization
tokens = word_tokenize(text) #tokenizing the text
print("Tokens: \n", tokens)

#Filtering noise (numbers and special characters)


filtered_tokens = [token for token in tokens if token.isalnum()] #filtering noise
filtered_tokens = [token.lower() for token in filtered_tokens]

#converting to lowercase
print("Filtered Tokens:\n", filtered_tokens)
print("Filtered Text:\n", ' '.join(filtered_tokens),'\n')

Pro:-2

import nltk
nltk.download('brown')
from nltk.corpus import brown

# Get a list of categories


categories = brown.categories()
print("Categories in Brown Corpus:", categories)

# Get a list of file IDs


file_ids = brown.fileids()
print("File IDs in Brown Corpus:", file_ids[:5])

# Access raw text of a specific file


text = brown.raw(file_ids[0])
print(text[:500]) # Print the first 500 characters of the text

You might also like