Program 1
Program 1
import nltk
from nltk.corpus import gutenberg
import string
nltk.download()
# Example: Load the text of 'Moby Dick'
raw_text = gutenberg.raw('melville-moby_dick.txt')
import re
def preprocess_text(text):
# Remove punctuation
text = text.translate(str.maketrans('', '', string.punctuation))
# Convert to lowercase
text = text.lower()
# Tokenize
tokens = text.split()
return tokens
processed_text = preprocess_text(raw_text)
def calculate_statistics(tokens):
# Assuming one document for simplicity
document_count = 1
word_count = len(tokens)
unique_words = set(tokens)
unique_word_count = len(unique_words)