~cytrogen/kobo-manga (4e504823f4bf8d2b5f4279da3f4d4ebe98fc97ad): src/kobo_manga/converter/kepub.py

"""KEPUB 打包器

将处理后的图片打包为 .kepub.epub 文件，可直接在 Kobo 设备上阅读。
"""

import uuid as uuid_mod
import zipfile
from datetime import datetime, timezone
from pathlib import Path

from kobo_manga.config import DeviceConfig
from kobo_manga.converter.templates import (
    CONTAINER_XML,
    MIMETYPE,
    STYLE_CSS,
    content_opf,
    nav_xhtml,
    page_xhtml,
    toc_ncx,
)
from kobo_manga.models import Chapter, MangaInfo


class KepubBuilder:
    """将漫画图片打包为 KEPUB 格式。"""

    def __init__(
        self, manga: MangaInfo, chapter: Chapter, device: DeviceConfig
    ):
        self.manga = manga
        self.chapter = chapter
        self.device = device
        self.uuid = str(uuid_mod.uuid4())

    def build(
        self,
        image_paths: list[Path],
        output_dir: Path,
        cover_path: Path | None = None,
    ) -> Path:
        """从处理后的图片构建 .kepub.epub 文件。

        Args:
            image_paths: 按页码排序的图片文件列表
            output_dir: 输出目录
            cover_path: 封面图路径，为 None 则用第一页

        Returns:
            生成的 .kepub.epub 文件路径
        """
        output_dir.mkdir(parents=True, exist_ok=True)

        # 文件名 sanitize（含章节 ID 避免同名章节冲突）
        safe_title = _sanitize_filename(self.manga.title)
        safe_chapter = _sanitize_filename(self.chapter.title)
        filename = f"{safe_title} - {self.chapter.id} {safe_chapter}.kepub.epub"
        output_path = output_dir / filename

        # 封面：有指定用指定的，否则用第一页
        if cover_path is None and image_paths:
            cover_path = image_paths[0]

        with zipfile.ZipFile(output_path, "w") as zf:
            # 1. mimetype 必须是第一个条目，不压缩
            info = zipfile.ZipInfo("mimetype")
            info.compress_type = zipfile.ZIP_STORED
            zf.writestr(info, MIMETYPE)

            # 2. META-INF/container.xml
            zf.writestr("META-INF/container.xml", CONTAINER_XML)

            # 3. 样式表
            zf.writestr("OEBPS/style.css", STYLE_CSS)

            # 4. 图片
            image_filenames = []
            if cover_path:
                cover_name = "cover.jpg"
                info = zipfile.ZipInfo(f"OEBPS/Images/{cover_name}")
                info.compress_type = zipfile.ZIP_STORED
                zf.writestr(info, cover_path.read_bytes())
                image_filenames.append(cover_name)

            for i, img_path in enumerate(image_paths, 1):
                img_name = f"page_{i:03d}.jpg"
                info = zipfile.ZipInfo(f"OEBPS/Images/{img_name}")
                info.compress_type = zipfile.ZIP_STORED
                zf.writestr(info, img_path.read_bytes())
                image_filenames.append(img_name)

            # 5. 页面 XHTML
            page_ids = []
            vw = self.device.width
            vh = self.device.height

            # 封面页
            if cover_path:
                cover_xhtml = page_xhtml(
                    page_num=0,
                    image_filename="cover.jpg",
                    viewport_w=vw,
                    viewport_h=vh,
                )
                zf.writestr("OEBPS/Text/cover.xhtml", cover_xhtml)
                page_ids.append(("cover", "OEBPS/Text/cover.xhtml"))

            for i in range(1, len(image_paths) + 1):
                p_xhtml = page_xhtml(
                    page_num=i,
                    image_filename=f"page_{i:03d}.jpg",
                    viewport_w=vw,
                    viewport_h=vh,
                )
                zf.writestr(f"OEBPS/Text/page_{i:03d}.xhtml", p_xhtml)
                page_ids.append(
                    (f"page_{i:03d}", f"OEBPS/Text/page_{i:03d}.xhtml")
                )

            # 6. content.opf
            manifest_items = self._build_manifest(
                image_filenames, page_ids, cover_path is not None
            )
            spine_items = self._build_spine(page_ids)

            title = f"{self.manga.title} - {self.chapter.title}"
            modified = datetime.now(timezone.utc).strftime(
                "%Y-%m-%dT%H:%M:%SZ"
            )

            opf = content_opf(
                uuid=self.uuid,
                title=title,
                author=self.manga.author,
                language="zh",
                description=self.manga.description,
                series_name=self.manga.title,
                series_index=self.chapter.chapter_number,
                modified=modified,
                manifest_items=manifest_items,
                spine_items=spine_items,
                viewport_w=vw,
                viewport_h=vh,
            )
            zf.writestr("OEBPS/content.opf", opf)

            # 7. toc.ncx
            nav_points = self._build_ncx_nav_points(page_ids)
            ncx = toc_ncx(
                uuid=self.uuid, title=title, nav_points=nav_points
            )
            zf.writestr("OEBPS/toc.ncx", ncx)

            # 8. nav.xhtml
            nav_items = self._build_nav_items(page_ids)
            nav = nav_xhtml(title=title, nav_items=nav_items)
            zf.writestr("OEBPS/nav.xhtml", nav)

        return output_path

    def _build_manifest(
        self,
        image_filenames: list[str],
        page_ids: list[tuple[str, str]],
        has_cover: bool,
    ) -> str:
        """生成 manifest 条目。"""
        lines = []

        # 图片条目
        for img_name in image_filenames:
            img_id = img_name.replace(".", "_")
            props = ""
            if img_name == "cover.jpg":
                props = ' properties="cover-image"'
            lines.append(
                f'    <item id="{img_id}" href="Images/{img_name}" '
                f'media-type="image/jpeg"{props}/>'
            )

        # XHTML 页面条目
        for page_id, page_href in page_ids:
            # href 相对于 OEBPS
            rel_href = page_href.replace("OEBPS/", "")
            lines.append(
                f'    <item id="{page_id}" href="{rel_href}" '
                f'media-type="application/xhtml+xml"/>'
            )

        return "\n".join(lines)

    def _build_spine(self, page_ids: list[tuple[str, str]]) -> str:
        """生成 spine 条目（阅读顺序）。"""
        lines = []
        for page_id, _ in page_ids:
            lines.append(f'    <itemref idref="{page_id}"/>')
        return "\n".join(lines)

    def _build_ncx_nav_points(
        self, page_ids: list[tuple[str, str]]
    ) -> str:
        """生成 NCX navPoint 条目。"""
        lines = []
        for i, (page_id, page_href) in enumerate(page_ids, 1):
            rel_href = page_href.replace("OEBPS/", "")
            label = "Cover" if page_id == "cover" else f"Page {i}"
            lines.append(
                f'    <navPoint id="navPoint-{i}" playOrder="{i}">\n'
                f"      <navLabel><text>{label}</text></navLabel>\n"
                f'      <content src="{rel_href}"/>\n'
                f"    </navPoint>"
            )
        return "\n".join(lines)

    def _build_nav_items(self, page_ids: list[tuple[str, str]]) -> str:
        """生成 EPUB3 nav 条目。"""
        lines = []
        for i, (page_id, page_href) in enumerate(page_ids, 1):
            rel_href = page_href.replace("OEBPS/", "")
            label = "Cover" if page_id == "cover" else f"Page {i}"
            lines.append(f'      <li><a href="{rel_href}">{label}</a></li>')
        return "\n".join(lines)


def _sanitize_filename(name: str) -> str:
    """清理文件名中的非法字符。"""
    return "".join(
        c if c.isalnum() or c in " _-()（）【】" else "_" for c in name
    )