0% found this document useful (0 votes)
18 views

Program 1

Uploaded by

mickeypinky123
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views

Program 1

Uploaded by

mickeypinky123
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 2

1Program 1: Counting words

import nltk
from nltk.corpus import gutenberg
import string
nltk.download()
# Example: Load the text of 'Moby Dick'
raw_text = gutenberg.raw('melville-moby_dick.txt')
import re

def preprocess_text(text):
# Remove punctuation
text = text.translate(str.maketrans('', '', string.punctuation))

# Convert to lowercase
text = text.lower()

# Tokenize

tokens = text.split()

return tokens

processed_text = preprocess_text(raw_text)
def calculate_statistics(tokens):
# Assuming one document for simplicity
document_count = 1

word_count = len(tokens)
unique_words = set(tokens)
unique_word_count = len(unique_words)

return document_count, word_count, unique_word_count

document_count, word_count, unique_word_count =


calculate_statistics(processed_text)

print("Document Count:", document_count)


print("Word Count:", word_count)
print("Unique Word Count:", unique_word_count)

You might also like