feat: add chapter page estimation when no links are found

RobBrazier · RobBrazier · commit 111fbfc02491 · 2025-04-23T16:09:55.000+01:00
diff --git a/plugins/manga-chapters/pyproject.toml b/plugins/manga-chapters/pyproject.toml
@@ -10,7 +10,7 @@ description = "Editor Plugin to generate Manga Table of Contents from an Image C
 readme = "README.md"
 authors = [{ name = "Rob Brazier", email = "git+github@brzr.co" }]
 requires-python = ">=3.11"
-dependencies = ["openai>=1.74.0", "pillow>=11.2.1"]
+dependencies = ["openai>=1.74.0"]
 
 [tool.setuptools_scm]
 fallback_version = "0.0.0+unknown"
diff --git a/plugins/manga-chapters/src/manga_chapters/llm.py b/plugins/manga-chapters/src/manga_chapters/llm.py
@@ -1,21 +1,28 @@
 import base64
-from io import BytesIO
+import mimetypes
 
 import openai
-from PIL import Image
 from pydantic import BaseModel
 
-PROMPT = """
+BASE_PROMPT = """
 Look at the image and output the chapters that you see. Focus only on the text content visible in the image. Do not generate any content that isn't visible in the image.
 Format the output in Title Case. Do not assume the ordering based on the first number that is read, only use numbering that is visible.
 
 For numbered chapters (numbered being either numeric (e.g. 1,2,3) or roman numerals (e.g. I,II,X), format as 'Chapter [number]: [title]'. Ensure that all roman numerals are converted to their numerical equivalents.
 For other chapters without numbers, format as '[category]: [title]' when a category is present. Omit the category when not.
+"""
 
-Please can you match the chapter names against the <links> provided, using the page number next to the chapter as reference, omitting any that don't have a page.
+LINKS_PROMPT = """
+Match the chapter names against the <links> provided, using the page number next to the chapter as reference, omitting any that don't have a page.
 IMPORTANT: Do not modify the input links to match chapter numbering on the contents page
 """
 
+PAGES_PROMPT = """
+Estimate the page urls, using the page number next to the chapter as a reference.
+Use <contents-url> as an anchor - this is where the contents page is. The first chapter is usually the page after this.
+The page urls to reference are in <pages> - Please match the chapter names against entries within this list.
+"""
+
 
 class Chapter(BaseModel):
     name: str
@@ -31,19 +38,62 @@ def __init__(self, url: str, model: str, api_key: str) -> None:
         self.client = openai.OpenAI(api_key=api_key, base_url=url)
         self.model = model
 
-    def read_chapters(self, links: list[str], image_bytes: bytes) -> dict[str, str]:
-        image = Image.open(BytesIO(image_bytes))
+    @staticmethod
+    def get_image_url(image_filename: str, image_bytes: bytes) -> str:
+        mime_type, _ = mimetypes.guess_type(image_filename)
         encoded_image = base64.b64encode(image_bytes).decode("utf-8")
-        image_url = f"data:{image.get_format_mimetype()};base64,{encoded_image}"
+        image_url = f"data:{mime_type};base64,{encoded_image}"
+        return image_url
+
+    @staticmethod
+    def format_response(response: ChapterResponse) -> dict[str, str]:
+        return {chapter.link: chapter.name for chapter in response.chapters}
+
+    def read_chapters_without_links(
+        self,
+        image_filename: str,
+        image_bytes: bytes,
+        contents_url: str,
+        pages: list[str],
+    ) -> dict[str, str]:
+        pages_text = "<pages>\n" + "\n".join(pages) + "\n</pages>"
+        image_url = self.get_image_url(image_filename, image_bytes)
+        response = self.client.beta.chat.completions.parse(
+            model=self.model,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": BASE_PROMPT},
+                        {"type": "text", "text": PAGES_PROMPT},
+                        {"type": "image_url", "image_url": {"url": image_url}},
+                        {
+                            "type": "text",
+                            "text": f"<contents-url>{contents_url}</contents-url>",
+                        },
+                        {"type": "text", "text": pages_text},
+                    ],
+                },
+            ],
+            response_format=ChapterResponse,
+        )
+        content = response.choices[0].message.parsed
+        return self.format_response(content)
+
+    def read_chapters_with_links(
+        self, links: list[str], image_filename: str, image_bytes: bytes
+    ) -> dict[str, str]:
         links_text = "<links>\n" + "\n".join(links) + "\n</links>"
 
+        image_url = self.get_image_url(image_filename, image_bytes)
         response = self.client.beta.chat.completions.parse(
             model=self.model,
             messages=[
                 {
                     "role": "user",
                     "content": [
-                        {"type": "text", "text": PROMPT},
+                        {"type": "text", "text": BASE_PROMPT},
+                        {"type": "text", "text": LINKS_PROMPT},
                         {"type": "image_url", "image_url": {"url": image_url}},
                         {"type": "text", "text": links_text},
                     ],
@@ -52,5 +102,4 @@ def read_chapters(self, links: list[str], image_bytes: bytes) -> dict[str, str]:
             response_format=ChapterResponse,
         )
         content = response.choices[0].message.parsed
-        print(content)
-        return {chapter.link: chapter.name for chapter in content.chapters}
+        return self.format_response(content)
diff --git a/plugins/manga-chapters/src/manga_chapters/main.py b/plugins/manga-chapters/src/manga_chapters/main.py
@@ -1,6 +1,8 @@
 import os
 
 from calibre.customize import Plugin
+from calibre.ebooks.oeb.polish.container import Container
+from lxml.etree import _Element
 from calibre.ebooks.oeb.polish.toc import get_toc, commit_toc
 from calibre.gui2 import error_dialog, question_dialog
 from calibre.gui2.toc.main import TOC
@@ -36,8 +38,33 @@ def _normalise_path(base, path) -> str:
         base_dir = os.path.dirname(base)
         return os.path.normpath(os.path.join(base_dir, path))
 
-    # -> (image url, links, contents toc index)
-    def parse_links(self, toc, container) -> tuple[str, list[str], int]:
+    @staticmethod
+    def _find_image(contents: _Element) -> str | None:
+        # Look for HTML img tags
+        img_tags = contents.xpath("//*[local-name() = 'img']")
+        if img_tags:
+            # Check for src attribute
+            src = img_tags[0].get("src")
+            if src:
+                return src
+
+        # Look for SVG image tags
+        image_tags = contents.xpath("//*[local-name() = 'image']")
+        if image_tags:
+            # SVG images can use href or xlink:href
+            href = image_tags[0].get("href")
+            if href:
+                return href
+
+            # Check for xlink:href which is common in SVG
+            xlink_href = image_tags[0].get("{http://www.w3.org/1999/xlink}href")
+            if xlink_href:
+                return xlink_href
+
+        return None
+
+    # -> (image url, links, contents toc index, contents url)
+    def parse_links(self, toc, container) -> tuple[str, list[str], int, str]:
         contents_url: str | None = None
         contents_index: int | None = None
         for i, item in enumerate(toc):
@@ -48,43 +75,53 @@ def parse_links(self, toc, container) -> tuple[str, list[str], int]:
                 contents_index = i
                 break
         if not contents_url:
-            raise Exception("Unable to find contents entry")
+            raise Exception(
+                "Unable to find contents page. Please Update ToC to identify Contents page"
+            )
 
         contents = container.parsed(contents_url)
-        image = next(
-            iter(
-                [
-                    self._normalise_path(contents_url, i.get("src"))
-                    for i in contents.xpath("//*[local-name() = 'img']")
-                ]
-            ),
-            None,
-        )
+        image = self._find_image(contents)
+        if image:
+            image = self._normalise_path(contents_url, image)
         links = [
             self._normalise_path(contents_url, a.get("href"))
             for a in contents.xpath("//*[local-name() = 'a'][@href]")
         ]
-        return image, links, contents_index
+        return image, links, contents_index, contents_url
 
     def _get_image_contents(self, container, path) -> bytes:
         return container.raw_data(path, decode=False)
 
-    def _read_chapters(self, links: list[str], image: bytes) -> dict[str, str]:
+    def _read_chapters(
+        self,
+        links: list[str],
+        image_filename: str,
+        image: bytes,
+        contents_url: str,
+        pages: list[str],
+    ) -> (dict[str, str], bool):
         from .llm import LLMReader
 
         url = self.prefs["llm_endpoint"]
         model = self.prefs["llm_model"]
         api_key = self.prefs["api_key"]
         reader = LLMReader(url, model, api_key)
-        return reader.read_chapters(links, image)
+        if len(links) > 0:
+            return reader.read_chapters_with_links(links, image_filename, image), False
+        return reader.read_chapters_without_links(
+            image_filename, image, contents_url, pages
+        ), True
 
-    def _confirm_apply(self, changes):
+    def _confirm_apply(self, changes: list[str], estimated: bool):
         mappings_string = "\n".join(changes)
+        disclaimer = ""
+        if estimated:
+            disclaimer = "\nIMPORTANT: No links were found in the Contents page, so the Pages were estimated. Please validate these are correct."
         return question_dialog(
             self.gui,
             _("Add Generated Chapters?"),
             _(
-                f"Chapter mappings have been successfully generated:\n\n{mappings_string}\n\nContinue with applying?"
+                f"Chapter mappings have been successfully generated:\n\n{mappings_string}\n\nContinue with applying?{disclaimer}"
             ),
         )
 
@@ -104,19 +141,27 @@ def _update_toc(self, toc: TOC, contents_idx: int, entries: dict[str, str]):
             self.boss.revert_requested(self.boss.global_undo.previous_container)
             raise
 
+    def get_pages(self, container: Container) -> list[str]:
+        return container.manifest_items_of_type("application/xhtml+xml")
+
     def generate_toc(self):
         with self:
             try:
                 self.boss.add_savepoint("Before: Generate ToC")
                 container = self.current_container
                 toc = get_toc(container)
-                image, links, contents_idx = self.parse_links(toc, container)
+                image, links, contents_idx, contents_url = self.parse_links(
+                    toc, container
+                )
+                pages = self.get_pages(container)
                 contents_image = self._get_image_contents(container, image)
-                chapters = self._read_chapters(links, contents_image)
+                chapters, estimated = self._read_chapters(
+                    links, image, contents_image, contents_url, pages
+                )
                 mappings = []
                 for link, chapter in chapters.items():
                     mappings.append(f"{chapter} => {link}")
-                apply = self._confirm_apply(mappings)
+                apply = self._confirm_apply(mappings, estimated)
                 if apply:
                     self._update_toc(toc, contents_idx + 1, chapters)
             except Exception:
diff --git a/uv.lock b/uv.lock