1 - Write A Python Program To Perform Following Tasks On Text A) Tokenization
1 - Write A Python Program To Perform Following Tasks On Text A) Tokenization
a) Tokenization
# Sample text
tokens = text.split()
print("Tokens:", tokens)
import nltk
nltk.download('punkt')
nltk.download('stopwords')
# Sample text
tokens = word_tokenize(text)
stop_words = set(stopwords.words('english'))
import re
class PorterStemmer:
suffixes = [
(r"(sses|ss)$", ""),
(r"(ies|ied)$", "i"),
(r"(ing|ed)$", ""),
(r"(es|s)$", ""),
(r"(ly|ness)$", ""),
(r"(er|ful)$", ""),
word = word.lower()
return word
porter_stemmer = PorterStemmer()
a) Word Analysis
import string
class WordAnalyzer:
def __init__(self):
self.vowels = "aeiou"
word = word.lower()
word_length = len(word)
consonants_count = sum(1 for char in word if char in string.ascii_lowercase and char not in
self.vowels)
unique_chars = len(set(word)
return {
"Word": word,
"Length": word_length,
"Vowels": vowels_count,
"Consonants": consonants_count,
word_analyzer = WordAnalyzer()
analysis = word_analyzer.analyze_word(word)
print("\nWord Analysis:")
b) Word Generation
import random
import string
def generate_word(length):
return words
# Example usage
word_senses = {
sentences = [
sense_counter = Counter()
sense_counter[sense] += 1
if word in sentence.lower():
print(f"Sentence: {sentence}")
print("-" * 50)
5. Install NLTK tool kit and perform stemming
import nltk
stemmer = PorterStemmer()
# Sample text
text = "The cats were playing with the scratched balls, and they enjoyed the games."
words = word_tokenize(text)
# Perform stemming
import nltk
words = ["run", "quickly", "dog", "happily", "under", "the", "sky", "ate", "jump", "beautiful"]
tagged_words = pos_tag(words)
def find_pos(word):
if w.lower() == word.lower():
# Example usage
print(find_pos("dog"))
print(find_pos("run"))
7. Write a Python program to
import nltk
nltk.download('punkt')
nltk.download('wordnet')
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
# Sample text
words = word_tokenize(text)
# Display results
print("Stemmed:", stemmed)
print("Lemmatized:", lemmatized)
b) Generate n-grams using NLTK N-Grams library
import nltk
nltk.download('punkt')
# Sample text
print("Bigrams:", bigrams)
print("Trigrams:", trigrams)
c) Implement N-Grams Smoothing
import nltk
nltk.download('punkt')
# Sample text
text = "The quick brown fox jumps over the lazy dog."
tokens = word_tokenize(text)
n=2
# Generate n-grams
ngram_counts = Counter(ngram_list)
total_ngrams = len(ngram_list)
# Laplace (Add-one) smoothing
def laplace_smoothing(ngram):
smoothed_prob = laplace_smoothing(bigram)