Reference: 忽略脚本和缓存文件,移除extract_pdf.py跟踪
This commit is contained in:
@@ -1 +1,11 @@
|
|||||||
EVT/
|
EVT/
|
||||||
|
|
||||||
|
# Script files
|
||||||
|
*.py
|
||||||
|
*.sh
|
||||||
|
*.bat
|
||||||
|
|
||||||
|
# Cache files
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.cache
|
||||||
|
|||||||
@@ -1,29 +0,0 @@
|
|||||||
import pdfplumber
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def extract_pdf_text(pdf_path, output_path):
|
|
||||||
try:
|
|
||||||
with pdfplumber.open(pdf_path) as pdf:
|
|
||||||
text = ""
|
|
||||||
for i, page in enumerate(pdf.pages):
|
|
||||||
page_text = page.extract_text()
|
|
||||||
if page_text:
|
|
||||||
text += f"=== Page {i + 1} ===\n"
|
|
||||||
text += page_text + "\n\n"
|
|
||||||
|
|
||||||
with open(output_path, "w", encoding="utf-8") as f:
|
|
||||||
f.write(text)
|
|
||||||
print(f"Extracted {len(pdf.pages)} pages from {pdf_path}")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error extracting {pdf_path}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Extract CH390 datasheet
|
|
||||||
extract_pdf_text("CH390DS1.PDF", "CH390DS1.txt")
|
|
||||||
|
|
||||||
# Extract STM32F103C8 datasheet
|
|
||||||
extract_pdf_text("stm32f103c8.pdf", "stm32f103c8.txt")
|
|
||||||
Reference in New Issue
Block a user