Reference: 忽略脚本和缓存文件，移除extract_pdf.py跟踪

2026-03-23 14:07:18 +08:00
parent d1a9c27b26
commit 0d1f32c75c
2 changed files with 10 additions and 29 deletions
@@ -1 +1,11 @@
 EVT/
 # Script files
 *.py
 *.sh
 *.bat
 # Cache files
 __pycache__/
 *.pyc
 *.cache
@@ -1,29 +0,0 @@
 import pdfplumber
 import sys
 def extract_pdf_text(pdf_path, output_path):
    try:
        with pdfplumber.open(pdf_path) as pdf:
            text = ""
            for i, page in enumerate(pdf.pages):
                page_text = page.extract_text()
                if page_text:
                    text += f"=== Page {i + 1} ===\n"
                    text += page_text + "\n\n"
            with open(output_path, "w", encoding="utf-8") as f:
                f.write(text)
            print(f"Extracted {len(pdf.pages)} pages from {pdf_path}")
            return True
    except Exception as e:
        print(f"Error extracting {pdf_path}: {e}")
        return False
 if __name__ == "__main__":
    # Extract CH390 datasheet
    extract_pdf_text("CH390DS1.PDF", "CH390DS1.txt")
    # Extract STM32F103C8 datasheet
    extract_pdf_text("stm32f103c8.pdf", "stm32f103c8.txt")