0% found this document useful (0 votes)
21 views3 pages

Duckduckgo Download

This document outlines a Streamlit application that integrates various AI and web scraping functionalities, allowing users to interact with an AI chatbot and perform web searches. It includes features for searching DuckDuckGo, scraping webpage content, generating AI responses, and downloading content. The application utilizes LangChain components for managing chat history and processing text data.

Uploaded by

veldutinagasai97
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views3 pages

Duckduckgo Download

This document outlines a Streamlit application that integrates various AI and web scraping functionalities, allowing users to interact with an AI chatbot and perform web searches. It includes features for searching DuckDuckGo, scraping webpage content, generating AI responses, and downloading content. The application utilizes LangChain components for managing chat history and processing text data.

Uploaded by

veldutinagasai97
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

import streamlit as st

import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.memory import ChatMessageHistory
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import json

# Function to search DuckDuckGo


def duckduckgo_search(query, num_results=5):
with DDGS() as ddgs: # Use the DDGS class
results = [r for r in ddgs.text(query, max_results=num_results)] # Perform
search
# Extract URLs from results (check the correct key)
return [result.get("href") for result in results] if results else [] # Updated
key to "href"

# Function to scrape text from a webpage


def scrape_text(url):
try:
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
soup = BeautifulSoup(response.text, "html.parser")
return " ".join([p.text for p in soup.find_all("p")]) # Extract text from
<p> tags
except Exception as e:
return f"Error scraping {url}: {e}"

# Function to download scraped content as a file


def download_content(content, filename):
st.download_button(
label="Download Content",
data=content,
file_name=filename,
mime="text/plain"
)

# Initialize the Ollama LLM model


llm = ChatOllama(model="deepseek-r1:latest")

# Initialize Streamlit UI
st.set_page_config(page_title="Enterprise AI Chatbot", layout="wide")

st.title("🚀 Enterprise AI Chatbot with LangChain & Streamlit")


st.write("Ask anything and interact with your AI assistant!")

# Chat history
if "history" not in st.session_state:
st.session_state.history = ChatMessageHistory()

# User input
user_input = st.text_input("You:", key="user_input")

if st.button("Ask"):
if user_input:
# Add user input to history
st.session_state.history.add_user_message(user_input)

# Get AI response
ai_response = llm.invoke(st.session_state.history.messages)
st.session_state.history.add_ai_message(ai_response.content)

# Display chat
for msg in st.session_state.history.messages:
if msg.type == "human":
st.write(f"👤 You: {msg.content}")
else:
st.write(f"🤖 AI: {msg.content}")

# Web Crawling and Search


st.sidebar.header("Keyword-Based Web Crawler")
keyword = st.sidebar.text_input("Enter a keyword to search & scrape:")

if st.sidebar.button("Crawl & Search"):


if keyword:
st.write(f"🔍 Searching DuckDuckGo for: **{keyword}**...")
urls = duckduckgo_search(keyword)

if urls:
st.write("✅ Found pages:", urls)

# Store URLs in session state for later use


st.session_state.urls = urls

# Scrape content from the retrieved URLs


scraped_texts = [scrape_text(url) for url in urls]

# Split scraped text into chunks


text_splitter = RecursiveCharacterTextSplitter(chunk_size=200,
chunk_overlap=20)
texts = text_splitter.create_documents(scraped_texts)

# Create FAISS index


embeddings = OllamaEmbeddings(model="nomic-embed-text:latest")
db = FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever()

# User searches inside the scraped content


search_query = st.text_input("Search in scraped content:")

if st.button("Search in Scraped Data"):


docs = retriever.invoke(search_query)
if docs:
st.write("🔍 Retrieved Text:", docs[0].page_content[:300])
else:
st.write("No relevant results found.")
else:
st.write("No search results found.")

# Button to retrieve and display content from generated websites


if "urls" in st.session_state:
if st.sidebar.button("Retrieve Content from Websites"):
st.write("📄 Retrieved Content from Websites:")
for i, url in enumerate(st.session_state.urls):
st.write(f"### Content from: {url}")
content = scrape_text(url)
st.write(content[:1000]) # Display the first 1000 characters of the
content

# Add a download button for each scraped content


download_content(content, f"scraped_content_{i + 1}.txt")
st.write("---")

# Prompt-based AI interaction
st.sidebar.header("AI-Powered Prompt Generator")
topic = st.sidebar.text_input("Enter a topic for AI-generated content:")

if st.sidebar.button("Generate"):
prompt_template = ChatPromptTemplate.from_template("Tell me a joke about
{topic}")
chain = prompt_template | llm | StrOutputParser()
ai_output = chain.invoke({"topic": topic})
st.sidebar.write(f"📝 AI Response: {ai_output}")

st.sidebar.markdown("📌 **Enterprise AI solution using LangChain + Streamlit**")

You might also like