Files
TCP2UART/Reference/extract_pdf.py
T

30 lines
879 B
Python

import pdfplumber
import sys
def extract_pdf_text(pdf_path, output_path):
try:
with pdfplumber.open(pdf_path) as pdf:
text = ""
for i, page in enumerate(pdf.pages):
page_text = page.extract_text()
if page_text:
text += f"=== Page {i + 1} ===\n"
text += page_text + "\n\n"
with open(output_path, "w", encoding="utf-8") as f:
f.write(text)
print(f"Extracted {len(pdf.pages)} pages from {pdf_path}")
return True
except Exception as e:
print(f"Error extracting {pdf_path}: {e}")
return False
if __name__ == "__main__":
# Extract CH390 datasheet
extract_pdf_text("CH390DS1.PDF", "CH390DS1.txt")
# Extract STM32F103C8 datasheet
extract_pdf_text("stm32f103c8.pdf", "stm32f103c8.txt")