#!/usr/bin/env python3 """ 预处理 Markdown 文件,将 Hugo shortcode 转换为 Pandoc 可识别的格式 处理两种 shortcode: 1. {{< figure src="/fig/xxx.png" caption="xxx" >}} → ![xxx](static/fig/xxx.png) 2. {{< figure ... >}} (无 src) → 移除(通常用于代码示例) """ import os import re import sys from pathlib import Path FIGURE_SHORTCODE_RE = re.compile(r"\{\{<\s*figure\b(.*?)>\}\}", re.DOTALL) ATTR_RE = re.compile(r'([\w-]+)="([^"]*)"') ABS_IMAGE_RE = re.compile(r'!\[([^\]]*)\]\(/(?!static/)([^)]+)\)') FRONT_MATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n?", re.DOTALL) TITLE_RE = re.compile(r'^title:\s*(?:"([^"]*)"|\'([^\']*)\'|(.+?))\s*$', re.MULTILINE) LINK_HEADING_RE = re.compile(r"^(#{2,6})\s+(\[[^\]]+\]\([^)]+\))\s*$", re.MULTILINE) HEADER_ID_RE = re.compile(r"\{#([A-Za-z0-9_:-]+)\}") RAW_ID_RE = re.compile(r'(]*\bid=")([^"]+)(")', re.IGNORECASE) RAW_HREF_RE = re.compile(r'(]*\bhref=")(/[^"#?)]*)(#[^"]*)?(")', re.IGNORECASE) MD_HREF_RE = re.compile(r"(? {output_path}") def main(): """主函数""" if len(sys.argv) < 2: print("Usage: preprocess.py [output_file]") print(" or: preprocess.py ") sys.exit(1) input_path = sys.argv[1] if os.path.isfile(input_path): # 处理单个文件 output_path = sys.argv[2] if len(sys.argv) > 2 else input_path process_file(input_path, output_path) elif os.path.isdir(input_path): # 处理目录 output_dir = sys.argv[2] input_dir = Path(input_path) # 获取所有 .md 文件 md_files = sorted(input_dir.glob('*.md')) known_pages = {_slug_for_path(path) for path in md_files} for md_file in md_files: output_file = os.path.join(output_dir, md_file.name) process_file(str(md_file), output_file, known_pages) print(f"\nTotal processed: {len(md_files)} files") else: print(f"Error: {input_path} is not a valid file or directory") sys.exit(1) if __name__ == '__main__': main()