Python script for PDF_reading
Python script for PDF_reading
import re
from collections import defaultdict
class PDFReader:
def __init__(self, file_path):
self.file_path = file_path
self.chapters = defaultdict(list)
self.topics = defaultdict(list)
def extract_text(self):
with open(self.file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text() + '\n'
return text
def sort_data(self):
# Sort topics within each chapter
for chapter in self.topics:
self.topics[chapter].sort()
def process_pdf(self):
text = self.extract_text()
self.store_chapters(text)
self.sort_data()
# Example usage
if __name__ == "__main__":
pdf_reader = PDFReader('C:/Videos/363007BUSC01282_CDFE02_117.pdf')
pdf_reader.process_pdf()
# Example question
question = 'lifting devices need to be re-certified'
answers = pdf_reader.answer_question(question)
for answer in answers:
print(answer)