0% found this document useful (0 votes)
4 views12 pages

Gen AIL

The document contains multiple Python programs that utilize various libraries for natural language processing tasks. These tasks include word embeddings, sentiment analysis, text summarization, and generating text based on user input. Each program demonstrates specific functionalities such as loading models, performing analogies, visualizing word embeddings, and fetching details from Wikipedia.

Uploaded by

BHUVAN
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views12 pages

Gen AIL

The document contains multiple Python programs that utilize various libraries for natural language processing tasks. These tasks include word embeddings, sentiment analysis, text summarization, and generating text based on user input. Each program demonstrates specific functionalities such as loading models, performing analogies, visualizing word embeddings, and fetching details from Wikipedia.

Uploaded by

BHUVAN
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 12

Program 1:

import gensim.downloader as api

print("Loading model...")

model = api.load("word2vec-google-news-300")

print("Model loaded!")

def analogy(pos, neg=[]):

try:

return model.most_similar(positive=pos, negative=neg, topn=5)

except KeyError as e:

return f"Word not in vocab: {e}"

def display(result, title):

print(f"\n{title}")

if isinstance(result, str): print(result)

else: [print(f"{w}: {s:.4f}") for w, s in result]

# Example: king - man + woman

display(analogy(["woman", "king"], ["man"]), "Example 1: King - Man + Woman = ?")

# Custom input

w1, w2, w3 = [input(f"Enter word {i+1}: ").strip() for i in range(3)]

display(analogy([w1, w3], [w2]), f"Result: {w1} - {w2} + {w3} = ?")


Program 2:

import numpy as np, matplotlib.pyplot as plt, gensim.downloader as api

from sklearn.decomposition import PCA

print("Loading model..."); model = api.load("glove-wiki-gigaword-300")

words = ["computer", "software", "algorithm", "data", "network", "robot", "artificial", "internet",


"cloud", "encryption"]

vecs = np.array([model[w] for w in words if w in model])

reduced = PCA(n_components=2).fit_transform(vecs)

# Use TSNE instead: from sklearn.manifold import TSNE; reduced =


TSNE(n_components=2).fit_transform(vecs)

plt.figure(figsize=(10, 8))

for w, (x, y) in zip(words, reduced):

plt.scatter(x, y); plt.text(x, y, w, fontsize=9)

plt.title("2D Word Embedding Visualization (PCA)"); plt.xlabel("Comp 1"); plt.ylabel("Comp 2");


plt.show()

def similar(word, topn=5):

try: return model.most_similar(word, topn=topn)

except KeyError: return f"'{word}' not in vocabulary."

inp = "algorithm"

result = similar(inp)

print(f"\nTop 5 words similar to '{inp}':")

[print(f"{w}: {s:.4f}") for w, s in result] if isinstance(result, list) else print(result)


Program 3:
import nltk, gensim.downloader as api

from gensim.models import Word2Vec

from nltk.tokenize import word_tokenize

nltk.download('punkt')

# Save corpus

corpus = """<PASTE_MEDICAL_TEXT_HERE>""" # Replace this with the full medical text

with open("medical.txt", "w") as f: f.write(corpus)

# Load and preprocess

with open("medical.txt", "r") as f:

text = f.read().lower()

sentences = [word_tokenize(s) for s in text.split('.') if s]

# Train model

model = Word2Vec(sentences, vector_size=100, window=5, min_count=2, workers=4, epochs=20)

model.save("custom_word2vec.model")

def show_similar(m, word):

if word in m.wv:

print(f"Similar to '{word}':")

[print(f"{w}: {s:.4f}") for w, s in m.wv.most_similar(word, topn=5)]

else: print(f"'{word}' not in vocab")

show_similar(model, "cell")

# Compare with GloVe

glove = api.load("glove-wiki-gigaword-300")

print("\nGeneral model results for 'cell':")

[print(f"{w}: {s:.4f}") for w, s in glove.most_similar("cell", topn=5)]


Program 4:
import nltk, string, gensim.downloader as api
from transformers import pipeline
from nltk.tokenize import word_tokenize

nltk.download('punkt', quiet=True)
vectors = api.load("glove-wiki-gigaword-100")
generator = pipeline("text-generation", model="gpt2")

def enrich_prompt(prompt, keyword, vectors):


words = word_tokenize(prompt)
enriched = []
for word in words:
w = word.lower().strip(string.punctuation)
if w == keyword.lower():
try:
sim = vectors.most_similar(w, topn=1)[0][0]
print(f"Replacing '{word}' with '{sim}'")
enriched.append(sim)
continue
except: print(f"'{w}' not in vocab.")
enriched.append(word)
return ' '.join(enriched)

def generate(text):
return generator(text, max_length=100, num_return_sequences=1)[0]['generated_text']
# Inputs
original = "Who is king?"
key = "king"
enriched = enrich_prompt(original, key, vectors)
# Generate
print("\nOriginal Response:\n", (o := generate(original)))
print("\nEnriched Response:\n", (e := generate(enriched)))
# Compare
print("\n--- Comparison ---")
print(f"Original Length: {len(o)} | Periods: {o.count('.')}")
print(f"Enriched Length: {len(e)} | Periods: {e.count('.')}")

Program 5:
import nltk, random, string

from collections import Counter

import gensim.downloader as api

nltk.download('punkt', quiet=True)

vectors = api.load("glove-wiki-gigaword-100")

def get_similar(word, topn=250):

try: return [w for w, _ in vectors.most_similar(word, topn=topn)]

except: return []

def generate_sentence(seed, words):

templates = [

f"The {seed} was surrounded by {words[0]} {words[1]}.",

f"People often associate {seed} with {words[2]} {words[3]}.",

f"In the land of {seed}, {words[4]} was common sight.",

f"A story about {seed} includes {words[1]} and {words[3]}."

return random.choice(templates)

def generate_paragraph(seed):

sim = get_similar(seed)
if not sim: return "No similar words found."

return ' '.join(generate_sentence(seed, sim) for _ in range(249))

def count_freq(text):

text = text.lower().translate(str.maketrans('', '', string.punctuation))

return Counter(text.split())

# Run it

seed = input("Enter a seed word: ").strip()

para = generate_paragraph(seed)

print("\nGenerated Paragraph:\n", para)

freq = count_freq(para)

print("\nTop 10 Word Frequencies:")

[print(f"{w}: {c}") for w, c in freq.most_common(10)]

Program 6:
from transformers import pipeline

analyze = pipeline("sentiment-analysis")

texts = [

"I love this product! It's absolutely fantastic.",

"This is the worst experience I've ever had.",

"Not bad, could be better but I'm satisfied.",

"I'm feeling neutral about this."

for text, result in zip(texts, analyze(texts)):

print(f"Text: {text}\nSentiment: {result['label']}, Score: {result['score']:.4f}\n")


Program 7:
from transformers import pipeline

summarize = pipeline("summarization")

text = """Artificial intelligence (AI) is a rapidly growing field transforming industries.

It includes language understanding, image recognition, decision-making, and more.

AI is used in healthcare, finance, and transport, and is growing more efficient with deep learning.

However, it raises concerns like bias, privacy, and job loss, which must be addressed."""

summary = summarize(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']

print("Summary:\n", summary)

Program 8:
from langchain import PromptTemplate

from langchain.llms import Cohere

text = """Artificial intelligence is transforming industries with new capabilities and efficiencies.

However, ethical concerns and job displacement are major challenges.

The future holds great promise, but must be managed responsibly."""

cohere_llm = Cohere(cohere_api_key="your-cohere-key", model="command") # Replace key

template = """You are an AI assistant. Analyze the following text:

{text}

Summary:

- Concise summary

Key Takeaways:

- Three key points


Sentiment:

- Positive, Negative, or Neutral"""

prompt = PromptTemplate(input_variables=["text"], template=template)

formatted_prompt = prompt.format(text=text)

response = cohere_llm.predict(formatted_prompt)

print(" Final Output:\n", response)

Program 9:
!pip install wikipedia-api pydantic ipywidgets

from pydantic import BaseModel

from typing import List, Optional

import wikipediaapi

import ipywidgets as widgets

from IPython.display import display

class InstitutionDetails(BaseModel):

founder: Optional[str]

founded: Optional[str]

branches: Optional[List[str]]

number_of_employees: Optional[int]

summary: Optional[str]

def fetch_institution_details(name: str) -> InstitutionDetails:

wiki = wikipediaapi.Wikipedia(user_agent="Colab/1.0", language='en')

page = wiki.page(name)

if not page.exists(): raise ValueError(f"No Wikipedia page found for '{name}'.")

founder = founded = None


branches = []

employees = None

for line in page.text.splitlines():

l = line.lower()

if 'founder' in l or 'founded' in l or 'branches' in l or 'number of employees' in l:

k, _, v = line.partition('=')

val = v.strip()

if 'founder' in l: founder = val

elif 'founded' in l: founded = val

elif 'branches' in l: branches = [b.strip() for b in val.split(',')]

elif 'employees' in l:

try: employees = int(val.replace(',', ''))

except: pass

return InstitutionDetails(

founder=founder, founded=founded,

branches=branches or None,

number_of_employees=employees,

summary=page.summary[:500]

def display_details(d: InstitutionDetails):

print(f"Founder: {d.founder or 'N/A'}")

print(f"Founded: {d.founded or 'N/A'}")

print(f"Branches: {', '.join(d.branches) if d.branches else 'N/A'}")

print(f"Number of Employees: {d.number_of_employees or 'N/A'}")

print(f"Summary: {d.summary or 'N/A'}")

def on_click(b):

name = input_box.value.strip()

if not name: print("Enter an institution name."); return


try: display_details(fetch_institution_details(name))

except ValueError as e: print(e)

input_box = widgets.Text(placeholder='Enter institution name', description='Institution:')

btn = widgets.Button(description='Fetch Details', icon='search')

btn.on_click(on_click)

display(input_box, btn)

Program 10:

!pip install langchain cohere wikipedia-api pydantic langchain_community --quiet

from langchain import PromptTemplate

from langchain.llms import Cohere

from pydantic import BaseModel

from typing import Optional

import wikipediaapi, getpass

from IPython.display import display

import ipywidgets as widgets

COHERE_API_KEY = getpass.getpass('Enter your Cohere API Key: ')

llm = Cohere(cohere_api_key=COHERE_API_KEY, model="command")

def fetch_ipc_summary():

wiki = wikipediaapi.Wikipedia(user_agent="IPCChatbot/1.0", language='en')

page = wiki.page("Indian Penal Code")

if not page.exists(): raise ValueError("IPC page not found.")

return page.text[:5000]

ipc_text = fetch_ipc_summary()
class IPCResponse(BaseModel):

section: Optional[str]

explanation: Optional[str]

prompt = PromptTemplate(

input_variables=["ipc_content", "question"],

template="""

You are a legal assistant specialized in the Indian Penal Code. Refer to the content below to answer:

{ipc_content}

Question: {question}

Respond with a detailed answer, citing relevant sections if applicable.

"""

def get_ipc_response(q: str) -> IPCResponse:

r = llm.predict(prompt.format(ipc_content=ipc_text, question=q))

if "Section" in r:

sec = r.split('Section')[1].split(':')[0].strip()

exp = r.split(':', 1)[-1].strip()

else:

sec, exp = None, r.strip()

return IPCResponse(section=sec, explanation=exp)

def display_response(res: IPCResponse):

print(f"Section: {res.section or 'N/A'}")

print(f"Explanation: {res.explanation}")

def on_click(b):

q = input_box.value

try: display_response(get_ipc_response(q))

except Exception as e: print(f"Error: {e}")


input_box = widgets.Text(placeholder='Ask about IPC', description='You:')

ask_btn = widgets.Button(description='Ask', icon='legal')

ask_btn.on_click(on_click)

display(input_box, ask_btn)

You might also like