TCP2UART/Reference/extract_pdf.py

import pdfplumber
import sys


def extract_pdf_text(pdf_path, output_path):
    try:
        with pdfplumber.open(pdf_path) as pdf:
            text = ""
            for i, page in enumerate(pdf.pages):
                page_text = page.extract_text()
                if page_text:
                    text += f"=== Page {i + 1} ===\n"
                    text += page_text + "\n\n"

            with open(output_path, "w", encoding="utf-8") as f:
                f.write(text)
            print(f"Extracted {len(pdf.pages)} pages from {pdf_path}")
            return True
    except Exception as e:
        print(f"Error extracting {pdf_path}: {e}")
        return False


if __name__ == "__main__":
    # Extract CH390 datasheet
    extract_pdf_text("CH390DS1.PDF", "CH390DS1.txt")

    # Extract STM32F103C8 datasheet
    extract_pdf_text("stm32f103c8.pdf", "stm32f103c8.txt")