Omkar Nimbalkar Ass3
Omkar Nimbalkar Ass3
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from heapq import nlargest
# Download punkt data if not already downloaded
nltk.download('punkt')
# Input text
text = """Artificial Intelligence (AI) is transforming industries by automating tasks,
# Preprocess: Remove special characters and digits
cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text)
# Tokenize sentences
sentences = sent_tokenize(cleaned_text)
# Tokenize words and calculate word frequencies
word_frequencies = {}
for word in word_tokenize(cleaned_text.lower()):
if word.isalpha(): # Only count words (ignore numbers and punctuation)
word_frequencies[word] = word_frequencies.get(word, 0) + 1
# Score sentences based on word frequency
sentence_scores = {sent: sum(word_frequencies.get(word.lower(), 0) for word in word_to
# Extract top 3 sentences for a more concise summary
summary_sentences = nlargest(3, sentence_scores, key=sentence_scores.get)
# Join the summary sentences
summary = ' '.join(summary_sentences)
# Clean summary (spacing fix, restore full forms, etc.)
summary = summary.replace("AI", "Artificial Intelligence") # Ensure proper usage of f
summary = summary.replace("datadriven", "data-driven") # Fix hyphenated words
summary = summary.replace("selfdriving", "self-driving") # Fix hyphenated words
summary = summary.replace("Machine learning", "Machine Learning") # Capitalize Machin
# Remove extra 'Artificial Intelligence' mentions
summary = summary.replace("Artificial Intelligence Artificial Intelligence", "Artifici
# Display the final summary
print("Summary:", summary)
In [14]: #SETA 3
from textblob import TextBlob
messages = [
"I purchased headphones online. I am very happy with the product.",
"I saw the movie yesterday. The animation was really good but the script was ok.",
"I enjoy listening to music",
"I take a walk in the park everyday"
]
for msg in messages:
sentiment = TextBlob(msg).sentiment.polarity
sentiment_label = "Positive" if sentiment > 0 else "Negative" if sentiment < 0 els
print(f"Message: {msg}\nSentiment: {sentiment_label}\n")
Message: I saw the movie yesterday. The animation was really good but the script was
ok.
Sentiment: Positive
Tokenized Sentences: ['[12/02/24, 10:30 AM] John: Hey, how are you?', "[12/02/24, 1
0:31 AM] Mike: I'm good, just working on a project.", '[12/02/24, 10:35 AM] John: Ni
ce!', 'Need any help?', '[12/02/24, 10:40 AM] Mike: Yeah, I need some suggestions on
data visualization.']
In [18]: #SETB 1
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
df = pd.read_csv(r"C:\Users\Practical\Desktop\instagram_global_top_1000.csv")
print(df.columns)
Empty DataFrame
Columns: [Account, Followers]
Index: []
Account Followers
747 yooncy1 2800000.0
In [28]: #SETB 2
df = pd.read_csv(r"C:\Users\Practical\Desktop\covid_2021_1.csv", encoding='ISO-8859-1
from textblob import TextBlob
def analyze_sentiment(text):
polarity = TextBlob(str(text)).sentiment.polarity
return "Positive" if polarity > 0 else "Negative" if polarity < 0 else "Neutral"
df['Sentiment'] = df['comment_text'].apply(analyze_sentiment)
print(df['Sentiment'].value_counts())
Sentiment
Neutral 44657
Positive 12044
Negative 4243
Name: count, dtype: int64
In [29]: #SETB 3
df = pd.read_csv("INvideos.csv")
# Total views, likes, dislikes, comments
print(df[['views', 'likes', 'dislikes', 'comment_count']].sum())
# Least and Most Liked Videos
print(df.nsmallest(1, 'likes')[['title', 'likes']])
print(df.nlargest(1, 'likes')[['title', 'likes']])
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[29], line 3
1 #SETB 3
----> 3 df = pd.read_csv("INvideos.csv")
5 # Total views, likes, dislikes, comments
6 print(df[['views', 'likes', 'dislikes', 'comment_count']].sum())
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:1026, i
n read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dt
ype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skip
footer, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, par
se_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst,
cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, q
uotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dial
ect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storag
e_options, dtype_backend)
1013 kwds_defaults = _refine_defaults_read(
1014 dialect,
1015 delimiter,
(...)
1022 dtype_backend=dtype_backend,
1023 )
1024 kwds.update(kwds_defaults)
-> 1026 return _read(filepath_or_buffer, kwds)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:620, in
_read(filepath_or_buffer, kwds)
617 _validate_names(kwds.get("names", None))
619 # Create the parser.
--> 620 parser = TextFileReader(filepath_or_buffer, **kwds)
622 if chunksize or iterator:
623 return parser
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:1620, i
n TextFileReader.__init__(self, f, engine, **kwds)
1617 self.options["has_index_names"] = kwds["has_index_names"]
1619 self.handles: IOHandles | None = None
-> 1620 self._engine = self._make_engine(f, self.engine)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:1880, i
n TextFileReader._make_engine(self, f, engine)
1878 if "b" not in mode:
1879 mode += "b"
-> 1880 self.handles = get_handle(
1881 f,
1882 mode,
1883 encoding=self.options.get("encoding", None),
1884 compression=self.options.get("compression", None),
1885 memory_map=self.options.get("memory_map", False),
1886 is_text=is_text,
1887 errors=self.options.get("encoding_errors", "strict"),
1888 storage_options=self.options.get("storage_options", None),
1889 )
1890 assert self.handles is not None
1891 f = self.handles.handle
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[33], line 7
4 import json
6 # Load JSON
----> 7 with open("your_posts.json", "r", encoding="utf-8") as file:
8 data = json.load(file)
10 df = pd.DataFrame(data)
File C:\ProgramData\anaconda3\Lib\site-packages\IPython\core\interactiveshell.py:32
4, in _modified_open(file, *args, **kwargs)
317 if file in {0, 1, 2}:
318 raise ValueError(
319 f"IPython won't let you open fd={file} by default "
320 "as it is likely to crash IPython. If you know what you are doing, "
321 "you can use builtins' open."
322 )
--> 324 return io_open(file, *args, **kwargs)
In [ ]: