#!/usr/bin/env python3 import os, sys, opencc import re def process_urls(text): """处理 Markdown 中的相对 URL,在前面添加 /tw 前缀""" # 定义需要处理的页面路径(不带.md后缀) page_paths = [ '/ch1', '/ch2', '/ch3', '/ch4', '/ch5', '/ch6', '/ch7', '/ch8', '/ch9', '/ch10', '/ch11', '/ch12', '/part-i', '/part-ii', '/part-iii', '/preface', '/glossary', '/colophon' ] # 对每个页面路径进行替换 for page_path in page_paths: # 匹配 Markdown 链接格式 [text](page_path) 或 [text](page_path#anchor) pattern = rf'\[([^\]]*)\]\(({re.escape(page_path)})(#[^)]*)?\)' # 替换为添加 /tw 前缀的版本 def replace_func(match): text_part = match.group(1) path_part = match.group(2) anchor_part = match.group(3) or '' return f'[{text_part}](/tw{path_part}{anchor_part})' text = re.sub(pattern, replace_func, text) return text def convert(src_path, dst_path, cfg='s2twp.json'): converter = opencc.OpenCC(cfg) with open(src_path, "r", encoding='utf-8') as src, open(dst_path, "w+", encoding='utf-8') as dst: dst.write("\n".join( process_urls( converter.convert(line.rstrip()).replace('(img/', '(../img/') .replace('髮送', '傳送') .replace('髮布', '釋出') .replace('髮生', '發生') .replace('髮出', '發出') .replace('嚐試', '嘗試') .replace('線上性一致', '在線性一致') # 优先按"在线"解析了? .replace('復雜', '複雜') .replace('討論瞭', '討論了') .replace('倒黴', '倒楣') .replace('區域性性', '區域性') .replace('下麵條件', '下面條件') # 优先按"面条"解析了? .replace('當日志', '當日誌') # 优先按"当日"解析了,没有考虑后面的"日志"? .replace('真即時間', '真實時間') # 优先按"实时"解析了,没有考虑前面的"真实"? .replace('面向物件', '物件導向') ) for line in src)) print("convert %s to %s" % (src_path, dst_path)) if __name__ == '__main__': print(sys.argv) home = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..')) zh_dir = os.path.join(home, 'content', 'zh') tw_dir = os.path.join(home, 'content', 'tw') os.chdir(zh_dir) for f in os.listdir(zh_dir): if f.endswith('.md'): dst = os.path.join(tw_dir, f) convert(f, dst)