~cytrogen/kobo-manga (4e504823f4bf8d2b5f4279da3f4d4ebe98fc97ad): src/kobo_manga/downloader/basic.py

"""基础图片下载器"""

import asyncio
from pathlib import Path

import httpx

from kobo_manga.config import AppConfig
from kobo_manga.models import PageImage

HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    ),
}


async def download_image(
    client: httpx.AsyncClient,
    image: PageImage,
    output_dir: Path,
    referer: str,
    max_retry: int = 3,
) -> PageImage:
    """下载单张图片，返回更新了 local_path 的 PageImage。"""
    # 从 URL 推断扩展名
    ext = Path(image.url.split("?")[0]).suffix or ".jpg"
    filename = f"{image.page_number:03d}{ext}"
    filepath = output_dir / filename

    if filepath.exists():
        image.local_path = str(filepath)
        return image

    for attempt in range(max_retry):
        try:
            resp = await client.get(
                image.url,
                headers={"Referer": referer},
            )
            resp.raise_for_status()

            filepath.parent.mkdir(parents=True, exist_ok=True)
            filepath.write_bytes(resp.content)
            image.local_path = str(filepath)
            return image

        except (httpx.HTTPError, OSError) as e:
            if attempt == max_retry - 1:
                raise RuntimeError(
                    f"下载失败 (第{image.page_number}页): {e}"
                ) from e
            await asyncio.sleep(1.0 * (attempt + 1))

    return image


async def download_chapter(
    images: list[PageImage],
    output_dir: Path,
    referer: str,
    config: AppConfig | None = None,
) -> list[PageImage]:
    """下载一个章节的所有图片。

    Args:
        images: 图片列表（含 URL）
        output_dir: 输出目录
        referer: Referer header
        config: 应用配置（用于并发数、重试数等）

    Returns:
        更新了 local_path 的图片列表
    """
    concurrent = 3
    max_retry = 3
    delay = 1.0

    if config:
        concurrent = config.download.concurrent
        max_retry = config.download.retry
        delay = config.download.delay

    output_dir.mkdir(parents=True, exist_ok=True)

    semaphore = asyncio.Semaphore(concurrent)

    async with httpx.AsyncClient(
        headers=HEADERS,
        follow_redirects=True,
        timeout=60.0,
    ) as client:

        async def _download_with_limit(img: PageImage) -> PageImage:
            async with semaphore:
                result = await download_image(
                    client, img, output_dir, referer, max_retry
                )
                await asyncio.sleep(delay)
                return result

        tasks = [_download_with_limit(img) for img in images]
        results = await asyncio.gather(*tasks, return_exceptions=True)

    downloaded = []
    errors = []
    for r in results:
        if isinstance(r, Exception):
            errors.append(r)
        else:
            downloaded.append(r)

    if errors:
        print(f"  ⚠ {len(errors)} 张图片下载失败:")
        for e in errors:
            print(f"    - {e}")

    return downloaded