0% found this document useful (0 votes)
12 views7 pages

App

The document contains a Flask application that serves as a file management and summarization tool. It supports various file formats, allowing users to search, open, and summarize files using an AI model. The application includes a web interface with functionalities for text extraction, database searching, and real-time summary streaming.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views7 pages

App

The document contains a Flask application that serves as a file management and summarization tool. It supports various file formats, allowing users to search, open, and summarize files using an AI model. The application includes a web interface with functionalities for text extraction, database searching, and real-time summary streaming.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 7

backend code:

from flask import Flask, render_template, request, jsonify, Response,


stream_with_context
import sqlite3
import os
import markdown
from PyPDF2 import PdfReader
import pandas as pd
from docx import Document
from pptx import Presentation
import webbrowser
import threading
import subprocess
import psutil
import sys
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import re
from transformers import TextIteratorStreamer

# Initialize Flask app


base_path = getattr(sys, 'frozen', False) and os.path.dirname(sys.executable) or
os.path.dirname(__file__)
app = Flask(
__name__,
template_folder=os.path.join(base_path, 'templates'),
static_folder=os.path.join(base_path, 'static')
)

def run():
exe_name = "app.exe" # Just the executable name, not the full path
for process in psutil.process_iter(attrs=['name']):
if process.info['name'] == exe_name:
print(f"{exe_name} is already running.")
return # Exit the function if the process is already running

current_directory = os.getcwd()
exe_path = os.path.join(current_directory, "app", "app.exe")
subprocess.Popen([exe_path])

def open_browser():
webbrowser.open_new("http://127.0.0.1:5000")

# Initialize AI model
print(f"GPU available: {torch.cuda.is_available()}")
device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = r"C:\Users\nick\DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.bfloat16
).to(device)

def get_file_text(file_path):
ext = os.path.splitext(file_path)[1].lower()
text = None
try:
if ext == '.txt':
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()

elif ext == '.pdf':


reader = PdfReader(file_path)
text = "".join([page.extract_text() or "" for page in reader.pages])

elif ext in ['.doc', '.docx']:


doc = Document(file_path)
text = "\n".join([para.text for para in doc.paragraphs if
para.text.strip() != ""])

elif ext in ['.ppt', '.pptx']:


prs = Presentation(file_path)
text = "\n".join([shape.text for slide in prs.slides for shape in
slide.shapes if hasattr(shape, "text")])

elif ext in ['.xls', '.xlsx']:


dfs = pd.read_excel(file_path, sheet_name=None)
text = "\n\n".join([f"Sheet: {name}\n{df.to_string(index=False)}" for
name, df in dfs.items()])

else:
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()

except Exception as e:
print(f"Error reading file ({ext}): {e}")
return None

return text

def search_database(query):
conn = sqlite3.connect(r"file_database.db")
cursor = conn.cursor()
cursor.execute("""SELECT id, file_name, path, file_type FROM files
WHERE file_name LIKE ? OR file_type LIKE ?""", (f"%{query}%",
f"%{query}%"))
results = cursor.fetchall()
conn.close()
return results

@app.route('/')
def index():
return render_template('index.html')

@app.route('/search')
def search():
query = request.args.get('q', '')
results = search_database(query) if query else []
return render_template('result.html', results=results)

@app.route('/open/<path:file_path>')
def open_file(file_path):
try:
os.startfile(file_path)
return "", 204
except Exception as e:
return str(e), 500

@app.route('/summary/<path:file_path>')
def summary_file(file_path):
return render_template('summary.html', file_name=os.path.basename(file_path),
file_path=file_path)

@app.route('/stream-summary', methods=['POST'])
def stream_summary():
file_path = request.form['file_path']

file_text = get_file_text(file_path)
if not file_text:
return Response("Error processing file", status=400)

def generate():
try:
prompt = f"请总结以下文本: {file_text}"
messages = [{"role": "user", "content": prompt}]

text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)

inputs = tokenizer(text, return_tensors="pt").to(device)


streamer = TextIteratorStreamer(tokenizer)

generation_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=1024,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)

thread = threading.Thread(target=model.generate,
kwargs=generation_kwargs)
thread.start()

buffer = ""
for new_text in streamer:
buffer += new_text
# Stream complete sentences when possible
if '.' in buffer:
parts = buffer.split('.')
for part in parts[:-1]:
yield part.strip() + '. '
buffer = parts[-1]
else:
yield buffer.strip()
buffer = ""
# Yield remaining content
if buffer.strip():
yield buffer.strip()

thread.join()

except Exception as e:
yield f"Error: {str(e)}"

return Response(stream_with_context(generate()), mimetype="text/plain")

if __name__ == '__main__':
if not any(proc.info['name'] == "app.exe" for proc in
psutil.process_iter(attrs=['name'])):
run()
threading.Timer(1.5, open_browser).start()
app.run(debug=True)

summary.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>总结</title>
<link rel="icon" href="{{ url_for('static', filename='img/logo_icon.ico') }}"
type="image/x-icon">

<style>
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}

body {
background: #e3f1fb;
font-family: "Inter-Regular", sans-serif;
}

.container {
max-width: 1280px;
margin: 0 auto;
padding: 20px;
display: flex;
flex-direction: column;
align-items: center;
}

.search-container {
display: flex;
align-items: center;
gap: 20px;
margin-bottom: 30px;
}
.search-box {
background: #ffffff;
border-radius: 29px;
width: 400px;
height: 57px;
padding: 10px 20px;
}

.search-btn {
background: #3a7fff;
border-radius: 29px;
width: 143px;
height: 57px;
display: flex;
align-items: center;
justify-content: center;
color: #ffffff;
font-size: 20px;
cursor: pointer;
}

.logo {
width: 195px;
margin-bottom: 20px;
}

.file-info {
background: #ffffff;
border-radius: 15px;
width: 100%;
padding: 20px;
margin-bottom: 20px;
display: flex;
justify-content: space-between;
align-items: center;
}

.summary-container {
background: #ffffff;
border-radius: 15px;
width: 100%;
min-height: 550px;
padding: 20px;
}

.open-btn {
background: #3a7fff;
border-radius: 29px;
width: 143px;
height: 57px;
display: flex;
align-items: center;
justify-content: center;
color: #ffffff;
font-size: 20px;
cursor: pointer;
}

.streaming-content {
white-space: pre-wrap;
padding: 20px;
border: 1px solid #ccc;
margin: 20px;
}
</style>
</head>
<body>
<div class="container">
<img class="logo" src="{{ url_for('static', filename='img/logo.svg') }}"
alt="Logo" />
<div class="search-container">
<input type="text" class="search-box" id="searchBox" placeholder="搜
索...">
<div class="search-btn" onclick="search()">搜索</div>
</div>
<div class="file-info">
<span>{{ file_name }}</span>
<a class="open-btn" href="{{ url_for('open_file',
file_path=file_path)}}">打开</a>
</div>
<div class="summary-container">
<h3>总结:</h3>
<div id="streaming-content">
<!-- Add this for streaming content -->
</div>
</div>
</div>
<script>
// Handle search functionality
document.getElementById('searchBox').addEventListener('keypress',
function(event) {
if (event.key === 'Enter') {
search();
}
});

function search() {
let query = document.getElementById('searchBox').value;
if (query.trim() === '') return;
window.location.href = `/search?q=${query}`;
}

const streamingContent = document.getElementById('streaming-content');


const filePath = "{{ file_path|replace('\\', '/') }}"; // Ensure proper file
path format

fetch('/stream-summary', {
method: 'POST',
body: new URLSearchParams({ file_path: filePath }),
headers: { 'Content-Type': 'application/x-www-form-urlencoded' }
})
.then(response => {
const reader = response.body.getReader();
function readStream() {
reader.read().then(({ done, value }) => {
if (done) return;
// Append the new chunk of text to the streaming content
streamingContent.textContent += new
TextDecoder().decode(value);
readStream(); // Continue reading the stream
});
}
readStream();
})
.catch(error => {
console.error("Error:", error);
streamingContent.innerHTML = `<div class="error">Error: $
{error.message}</div>`;
});
</script>
</body>
</html>

You might also like