import os import re import sqlite3 from collections import defaultdict # ==== 配置部分 ==== DB_PATH = r"C:\Users\数据库\database.db" OUTPUT_BASE = r"D:\导出" # ==== 工具函数 ==== INVALID_FILENAME_CHARS = r'[\\/:*?"<>|]' def sanitize_filename(name: str) -> str: """清理 Windows 非法文件名字符""" name = name or "未命名" return re.sub(INVALID_FILENAME_CHARS, "_", name).strip() def ensure_unique_path(path: str) -> str: """避免文件名冲突""" base, ext = os.path.splitext(path) counter = 1 p = path while os.path.exists(p): p = f"{base}_{counter}{ext}" counter += 1 return p def to_text(content) -> str: """将 blobs.content 安全转成 str""" if content is None: return "" if isinstance(content, bytes): for enc in ("utf-8", "utf-8-sig", "gb18030", "latin-1"): try: return content.decode(enc, errors="ignore") except Exception: pass return "" # 实在解码不了就当空 return str(content) def clean_content(content: str) -> str: """替换指定字符串""" return content.replace("1111111111", "222222222222") if content else "" def is_blank_content(content: str) -> bool: """判断正文是否为空(去除空白与不可见字符后)""" if not content: return True # 去除所有空白字符与零宽字符 s = re.sub(r'[\s\u200b\u200c\u200d\uFEFF]+', '', content) return s == "" or s == "0" YEAR_REGEX = re.compile(r'(?