mirror of
https://github.com/Vonng/ddia.git
synced 2026-06-21 00:47:05 +08:00
Merge b1e0ad3245 into 900a2550dc
This commit is contained in:
commit
eba72e0351
8 changed files with 554 additions and 2 deletions
143
.github/workflows/build-pdf.yaml
vendored
Normal file
143
.github/workflows/build-pdf.yaml
vendored
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
name: Build and Release PDF
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*'
|
||||
- 'release*'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
build-pdf:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
# Install pandoc
|
||||
wget -q https://github.com/jgm/pandoc/releases/download/3.1.11/pandoc-3.1.11-1-amd64.deb
|
||||
sudo dpkg -i pandoc-3.1.11-1-amd64.deb
|
||||
rm pandoc-3.1.11-1-amd64.deb
|
||||
|
||||
# Install TeX Live with XeLaTeX and Chinese support
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
texlive-xetex \
|
||||
texlive-lang-chinese \
|
||||
texlive-fonts-recommended \
|
||||
texlive-fonts-extra \
|
||||
fonts-noto-cjk \
|
||||
fonts-noto-cjk-extra
|
||||
|
||||
# Verify installations
|
||||
pandoc --version
|
||||
xelatex --version
|
||||
|
||||
- name: Build PDF
|
||||
run: |
|
||||
chmod +x bin/pdf bin/preprocess-epub.py
|
||||
mkdir -p output output/temp
|
||||
|
||||
# Preprocess Markdown files
|
||||
python3 bin/preprocess-epub.py content/zh output/temp
|
||||
|
||||
# Generate PDF with CI-specific fonts (Noto CJK)
|
||||
pandoc -o output/ddia.pdf \
|
||||
--metadata-file=metadata.yaml \
|
||||
-H bin/header-ci.tex \
|
||||
--toc \
|
||||
--toc-depth=2 \
|
||||
--top-level-division=chapter \
|
||||
--file-scope=true \
|
||||
--pdf-engine=xelatex \
|
||||
-V geometry:margin=1in \
|
||||
-V linestretch=1.5 \
|
||||
output/temp/_index.md \
|
||||
output/temp/preface.md \
|
||||
output/temp/part-i.md \
|
||||
output/temp/ch1.md \
|
||||
output/temp/ch2.md \
|
||||
output/temp/ch3.md \
|
||||
output/temp/ch4.md \
|
||||
output/temp/part-ii.md \
|
||||
output/temp/ch5.md \
|
||||
output/temp/ch6.md \
|
||||
output/temp/ch7.md \
|
||||
output/temp/ch8.md \
|
||||
output/temp/ch9.md \
|
||||
output/temp/part-iii.md \
|
||||
output/temp/ch10.md \
|
||||
output/temp/ch11.md \
|
||||
output/temp/ch12.md \
|
||||
output/temp/ch13.md \
|
||||
output/temp/ch14.md \
|
||||
output/temp/colophon.md \
|
||||
output/temp/glossary.md
|
||||
|
||||
rm -rf output/temp
|
||||
|
||||
if [ ! -f "output/ddia.pdf" ]; then
|
||||
echo "Error: PDF file was not created"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ls -lh output/ddia.pdf
|
||||
file output/ddia.pdf
|
||||
|
||||
- name: Upload PDF artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ddia-pdf
|
||||
path: output/ddia.pdf
|
||||
retention-days: 30
|
||||
|
||||
- name: Create/Update Release
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
files: output/ddia.pdf
|
||||
name: ${{ github.ref_name }}
|
||||
body: |
|
||||
## 《设计数据密集型应用》PDF 版本
|
||||
|
||||
此版本为自动生成的 PDF 电子书。
|
||||
|
||||
### 文件信息
|
||||
- 文件名: `ddia.pdf`
|
||||
- 生成时间: ${{ github.event.head_commit.timestamp }}
|
||||
- 标签: ${{ github.ref_name }}
|
||||
|
||||
### 依赖工具
|
||||
- Pandoc 3.1.11
|
||||
- XeLaTeX (TeX Live)
|
||||
- 中文字体: Noto CJK
|
||||
|
||||
### 本地生成
|
||||
如需本地生成 PDF,请确保安装以下依赖:
|
||||
```bash
|
||||
# macOS
|
||||
brew install pandoc
|
||||
brew install --cask mactex
|
||||
|
||||
# Ubuntu/Debian
|
||||
sudo apt install pandoc texlive-xetex texlive-lang-chinese
|
||||
|
||||
# 生成 PDF
|
||||
make pdf
|
||||
```
|
||||
draft: false
|
||||
prerelease: false
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -10,4 +10,7 @@ public/
|
|||
CLAUDE.md
|
||||
content/cn/
|
||||
zh.md
|
||||
en.md
|
||||
en.md
|
||||
.venv
|
||||
AGENTS.md
|
||||
|
||||
|
|
|
|||
5
Makefile
5
Makefile
|
|
@ -17,4 +17,7 @@ translate:
|
|||
epub:
|
||||
bin/epub
|
||||
|
||||
.PHONY: default doc translate
|
||||
pdf:
|
||||
bin/pdf
|
||||
|
||||
.PHONY: default doc translate epub pdf
|
||||
|
|
|
|||
13
README.md
13
README.md
|
|
@ -12,6 +12,19 @@
|
|||
|
||||
**阅读**:访问 [https://ddia.vonng.com](https://ddia.vonng.com) 阅读本书在线版本,或使用 [hugo](https://gohugo.io/documentation/) / [hextra](https://imfing.github.io/hextra/zh-cn/) 主题自行构建。
|
||||
|
||||
**下载**:可以使用以下命令生成 PDF 电子书:
|
||||
|
||||
```bash
|
||||
# 安装依赖
|
||||
brew install pandoc
|
||||
brew install poppler # 用于 pdftotext
|
||||
|
||||
# 生成 PDF
|
||||
make pdf
|
||||
```
|
||||
|
||||
生成的 PDF 文件位于 `output/ddia.pdf`
|
||||
|
||||
> [!NOTE]
|
||||
> [**DDIA 第二版**](https://ddia.vonng.com) 正在翻译中(翻译至至第十章),欢迎阅览并提出您的宝贵意见。
|
||||
|
||||
|
|
|
|||
31
bin/header-ci.tex
Normal file
31
bin/header-ci.tex
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
% Chinese support with xeCJK
|
||||
\usepackage{xeCJK}
|
||||
\setCJKmainfont{Noto Serif CJK SC}
|
||||
\setCJKsansfont{Noto Sans CJK SC}
|
||||
|
||||
% Enable Chinese line breaking
|
||||
\XeTeXlinebreaklocale "zh"
|
||||
|
||||
% Paragraph settings
|
||||
\usepackage{parskip}
|
||||
\setlength{\parindent}{2em}
|
||||
\usepackage{ragged2e}
|
||||
|
||||
% Chinese punctuation style
|
||||
\punctstyle{quanjiao}
|
||||
|
||||
% Cover page
|
||||
\AtBeginDocument{%
|
||||
\thispagestyle{empty}
|
||||
\begin{center}
|
||||
\vspace*{0.4\textheight}
|
||||
{\Huge\bfseries 设计数据密集型应用}
|
||||
\vspace{1cm}
|
||||
{\LARGE 第二版}
|
||||
\vspace{2cm}
|
||||
{\Large Martin Kleppmann}
|
||||
\vspace{0.5cm}
|
||||
{\large 冯若航 译}
|
||||
\end{center}
|
||||
\clearpage
|
||||
}
|
||||
31
bin/header.tex
Normal file
31
bin/header.tex
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
% Chinese support with xeCJK
|
||||
\usepackage{xeCJK}
|
||||
\setCJKmainfont{PingFang SC}
|
||||
\setCJKsansfont{Heiti SC}
|
||||
|
||||
% Enable Chinese line breaking
|
||||
\XeTeXlinebreaklocale "zh"
|
||||
|
||||
% Paragraph settings
|
||||
\usepackage{parskip}
|
||||
\setlength{\parindent}{2em}
|
||||
\usepackage{ragged2e}
|
||||
|
||||
% Chinese punctuation style
|
||||
\punctstyle{quanjiao}
|
||||
|
||||
% Cover page
|
||||
\AtBeginDocument{%
|
||||
\thispagestyle{empty}
|
||||
\begin{center}
|
||||
\vspace*{0.4\textheight}
|
||||
{\Huge\bfseries 设计数据密集型应用}
|
||||
\vspace{1cm}
|
||||
{\LARGE 第二版}
|
||||
\vspace{2cm}
|
||||
{\Large Martin Kleppmann}
|
||||
\vspace{0.5cm}
|
||||
{\large 冯若航 译}
|
||||
\end{center}
|
||||
\clearpage
|
||||
}
|
||||
108
bin/pdf
Executable file
108
bin/pdf
Executable file
|
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# Check for required dependencies
|
||||
check_dependencies() {
|
||||
local missing_deps=()
|
||||
|
||||
if ! command -v pandoc &> /dev/null; then
|
||||
missing_deps+=("pandoc")
|
||||
fi
|
||||
|
||||
if ! command -v xelatex &> /dev/null; then
|
||||
# Try lualatex as fallback
|
||||
if ! command -v lualatex &> /dev/null; then
|
||||
missing_deps+=("xelatex or lualatex (LaTeX engine)")
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${#missing_deps[@]} -ne 0 ]; then
|
||||
echo "Error: Missing required dependencies:"
|
||||
for dep in "${missing_deps[@]}"; do
|
||||
echo " - $dep"
|
||||
done
|
||||
echo ""
|
||||
echo "Installation:"
|
||||
echo " macOS: brew install pandoc"
|
||||
echo " macOS: brew install --cask mactex"
|
||||
echo ""
|
||||
echo " Linux: apt install pandoc texlive-xetex"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_dependencies
|
||||
|
||||
# Detect available PDF engine
|
||||
if command -v xelatex &> /dev/null; then
|
||||
PDF_ENGINE="xelatex"
|
||||
elif command -v lualatex &> /dev/null; then
|
||||
PDF_ENGINE="lualatex"
|
||||
else
|
||||
echo "Error: No suitable PDF engine found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
INPUT_DIR=$(cd "$(dirname "$SCRIPT_DIR")" && pwd)
|
||||
OUTPUT_DIR="$INPUT_DIR/output"
|
||||
TEMP_DIR="$OUTPUT_DIR/temp"
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
mkdir -p "$TEMP_DIR"
|
||||
|
||||
# Preprocess Markdown files to convert Hugo shortcodes
|
||||
echo "Preprocessing Markdown files..."
|
||||
python3 "${SCRIPT_DIR}/preprocess-epub.py" "${INPUT_DIR}/content/zh" "$TEMP_DIR"
|
||||
|
||||
convert_to_pdf() {
|
||||
# Convert all Markdown files into a single PDF book
|
||||
OUTPUT_BOOK="$OUTPUT_DIR/ddia.pdf"
|
||||
rm -f "$OUTPUT_BOOK"
|
||||
echo "Converting all Markdown files into $OUTPUT_BOOK..."
|
||||
|
||||
local meta_file=${INPUT_DIR}/metadata.yaml
|
||||
local header_file=${SCRIPT_DIR}/header.tex
|
||||
|
||||
# Use xelatex for Chinese support with custom header
|
||||
pandoc -o "$OUTPUT_BOOK" \
|
||||
--metadata-file="$meta_file" \
|
||||
-H "$header_file" \
|
||||
--toc \
|
||||
--toc-depth=2 \
|
||||
--top-level-division=chapter \
|
||||
--file-scope=true \
|
||||
--pdf-engine="$PDF_ENGINE" \
|
||||
-V geometry:margin=1in \
|
||||
-V linestretch=1.5 \
|
||||
"${TEMP_DIR}"/_index.md \
|
||||
"${TEMP_DIR}"/preface.md \
|
||||
"${TEMP_DIR}"/part-i.md \
|
||||
"${TEMP_DIR}"/ch1.md \
|
||||
"${TEMP_DIR}"/ch2.md \
|
||||
"${TEMP_DIR}"/ch3.md \
|
||||
"${TEMP_DIR}"/ch4.md \
|
||||
"${TEMP_DIR}"/part-ii.md \
|
||||
"${TEMP_DIR}"/ch5.md \
|
||||
"${TEMP_DIR}"/ch6.md \
|
||||
"${TEMP_DIR}"/ch7.md \
|
||||
"${TEMP_DIR}"/ch8.md \
|
||||
"${TEMP_DIR}"/ch9.md \
|
||||
"${TEMP_DIR}"/part-iii.md \
|
||||
"${TEMP_DIR}"/ch10.md \
|
||||
"${TEMP_DIR}"/ch11.md \
|
||||
"${TEMP_DIR}"/ch12.md \
|
||||
"${TEMP_DIR}"/ch13.md \
|
||||
"${TEMP_DIR}"/ch14.md \
|
||||
"${TEMP_DIR}"/colophon.md \
|
||||
"${TEMP_DIR}"/glossary.md
|
||||
|
||||
echo "PDF book created at $OUTPUT_BOOK."
|
||||
}
|
||||
|
||||
convert_to_pdf
|
||||
|
||||
# Clean up temporary files
|
||||
rm -rf "$TEMP_DIR"
|
||||
220
bin/pdf.py
Executable file
220
bin/pdf.py
Executable file
|
|
@ -0,0 +1,220 @@
|
|||
#!/usr/bin/env python3
|
||||
"""PDF generation from Markdown using pandoc + LaTeX."""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import subprocess
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Dict
|
||||
import shutil
|
||||
import importlib.util
|
||||
|
||||
|
||||
CHAPTER_ORDER = [
|
||||
"_index.md",
|
||||
"preface.md",
|
||||
"part-i.md",
|
||||
"ch1.md", "ch2.md", "ch3.md", "ch4.md",
|
||||
"part-ii.md",
|
||||
"ch5.md", "ch6.md", "ch7.md", "ch8.md", "ch9.md",
|
||||
"part-iii.md",
|
||||
"ch10.md", "ch11.md", "ch12.md", "ch13.md", "ch14.md",
|
||||
"colophon.md", "glossary.md",
|
||||
]
|
||||
|
||||
DEFAULT_FONTS = {
|
||||
"mainfont": "PingFang SC",
|
||||
"sansfont": "Heiti SC",
|
||||
}
|
||||
|
||||
YAML_FRONT_RE = re.compile(r'^---\n(.*?)\n---\n', re.DOTALL)
|
||||
TITLE_RE = re.compile(r'^title:\s*["\']?([^"\'\n]+)["\']?\s*$', re.MULTILINE)
|
||||
CHAPTER_NUM_RE = re.compile(r'^\d+\.\s*')
|
||||
CHAPTER_FILE_RE = re.compile(r'^ch(\d+)\.md$')
|
||||
CALLOUT_RE = re.compile(r'^> \[!(NOTE|TIP|WARNING|CAUTION|DANGER)\] ', re.MULTILINE)
|
||||
|
||||
|
||||
def convert_pdf_markdown(text: str, filename: str) -> str:
|
||||
"""PDF-specific markdown conversions."""
|
||||
text = _convert_callouts(text)
|
||||
text = _add_title_heading(text, filename)
|
||||
return text
|
||||
|
||||
|
||||
def _convert_callouts(text: str) -> str:
|
||||
"""Convert [!NOTE], [!TIP], etc. to Chinese."""
|
||||
def replace_callout(match):
|
||||
callout_type = match.group(1).lower()
|
||||
title_map = {
|
||||
'note': '注',
|
||||
'tip': '提示',
|
||||
'warning': '警告',
|
||||
'caution': '注意',
|
||||
'danger': '危险'
|
||||
}
|
||||
return f"**{title_map.get(callout_type, callout_type)}**: "
|
||||
|
||||
text = CALLOUT_RE.sub(replace_callout, text)
|
||||
text = re.sub(r'^> ?', '', text, flags=re.MULTILINE)
|
||||
return text
|
||||
|
||||
|
||||
def _add_title_heading(text: str, filename: str) -> str:
|
||||
"""Add title heading from YAML frontmatter."""
|
||||
match = YAML_FRONT_RE.match(text)
|
||||
if match:
|
||||
frontmatter = match.group(1)
|
||||
title_match = TITLE_RE.search(frontmatter)
|
||||
if title_match:
|
||||
title = title_match.group(1)
|
||||
body = text[match.end():]
|
||||
clean_title = CHAPTER_NUM_RE.sub('', title)
|
||||
|
||||
if CHAPTER_FILE_RE.match(filename):
|
||||
heading = f"# {clean_title}"
|
||||
else:
|
||||
heading = f"## {clean_title}"
|
||||
|
||||
return f"---\n{frontmatter}\n---\n\n{heading}\n\n{body}"
|
||||
return text
|
||||
|
||||
|
||||
def check_cmd(cmd: str) -> bool:
|
||||
return subprocess.run(["which", cmd], capture_output=True).returncode == 0
|
||||
|
||||
|
||||
def get_available_engine() -> Optional[str]:
|
||||
if check_cmd("xelatex"):
|
||||
return "xelatex"
|
||||
if check_cmd("lualatex"):
|
||||
return "lualatex"
|
||||
return None
|
||||
|
||||
|
||||
def check_dependencies() -> List[str]:
|
||||
missing = []
|
||||
if not check_cmd("pandoc"):
|
||||
missing.append("pandoc")
|
||||
if not get_available_engine():
|
||||
missing.append("xelatex or lualatex (LaTeX engine)")
|
||||
return missing
|
||||
|
||||
|
||||
def preprocess_markdown(input_dir: Path, output_dir: Path) -> None:
|
||||
script_dir = Path(__file__).parent
|
||||
preprocess_script = script_dir / "preprocess-epub.py"
|
||||
spec = importlib.util.spec_from_file_location("preprocess_epub", preprocess_script)
|
||||
if spec is None:
|
||||
raise RuntimeError("Failed to load preprocess module")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
if spec.loader is None:
|
||||
raise RuntimeError("Failed to load preprocess module loader")
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
md_files = sorted(input_dir.glob("*.md"))
|
||||
|
||||
print(f"Preprocessing {len(md_files)} files...")
|
||||
for md_file in md_files:
|
||||
temp_output = output_dir / "tmp_preprocess.md"
|
||||
module.process_file(str(md_file), str(temp_output))
|
||||
|
||||
with open(temp_output, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
content = convert_pdf_markdown(content, md_file.name)
|
||||
|
||||
with open(output_dir / md_file.name, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
temp_output.unlink()
|
||||
|
||||
|
||||
def generate_pdf(
|
||||
temp_dir: Path,
|
||||
output_file: Path,
|
||||
metadata_file: Optional[str],
|
||||
engine: str,
|
||||
fonts: Dict[str, str],
|
||||
margin: str = "1in",
|
||||
) -> None:
|
||||
chapters = [str(temp_dir / ch) for ch in CHAPTER_ORDER if (temp_dir / ch).exists()]
|
||||
|
||||
if not chapters:
|
||||
raise ValueError("No valid chapter files found")
|
||||
|
||||
script_dir = Path(__file__).parent
|
||||
header_file = script_dir / "header.tex"
|
||||
|
||||
cmd = [
|
||||
"pandoc", "-o", str(output_file),
|
||||
"--metadata-file", metadata_file or "",
|
||||
"-H", str(header_file),
|
||||
"--toc",
|
||||
"--toc-depth=2",
|
||||
"--top-level-division=chapter",
|
||||
"--file-scope",
|
||||
f"--pdf-engine={engine}",
|
||||
f"-V geometry:margin={margin}",
|
||||
"-V linestretch=1.5",
|
||||
"-V book=true",
|
||||
"-V classoption=openany",
|
||||
"-V mainfont=PingFang SC",
|
||||
]
|
||||
cmd = [c for c in cmd if c]
|
||||
cmd.extend(chapters)
|
||||
|
||||
print(f"Generating PDF with {engine}...")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"PDF generation failed: {result.stderr}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate PDF from Markdown")
|
||||
parser.add_argument("-i", "--input", default="content/zh", help="Input directory")
|
||||
parser.add_argument("-o", "--output", default="output", help="Output directory")
|
||||
parser.add_argument("-m", "--metadata", help="Metadata YAML file")
|
||||
parser.add_argument("-e", "--engine", choices=["xelatex", "lualatex"], help="PDF engine")
|
||||
parser.add_argument("--no-cleanup", action="store_true", help="Keep temp files")
|
||||
args = parser.parse_args()
|
||||
|
||||
project_root = Path(__file__).parent.parent
|
||||
input_dir = project_root / args.input
|
||||
output_dir = project_root / args.output
|
||||
temp_dir = output_dir / "temp"
|
||||
|
||||
missing = check_dependencies()
|
||||
if missing:
|
||||
print("Error: Missing dependencies:")
|
||||
for dep in missing:
|
||||
print(f" - {dep}")
|
||||
print("\nInstall: brew install pandoc && brew install --cask mactex")
|
||||
sys.exit(1)
|
||||
|
||||
detected_engine = get_available_engine()
|
||||
if detected_engine is None:
|
||||
print("Error: No PDF engine available")
|
||||
sys.exit(1)
|
||||
|
||||
engine = args.engine or detected_engine
|
||||
metadata = args.metadata or str(project_root / "metadata.yaml")
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_file = output_dir / "ddia.pdf"
|
||||
output_file.unlink(missing_ok=True)
|
||||
|
||||
preprocess_markdown(input_dir, temp_dir)
|
||||
generate_pdf(temp_dir, output_file, metadata, engine, DEFAULT_FONTS)
|
||||
|
||||
if not args.no_cleanup and temp_dir.exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
print(f"PDF created: {output_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue