Skip to content

Commit 111fbfc

Browse files
committed
feat: add chapter page estimation when no links are found
1 parent 0fdb216 commit 111fbfc

4 files changed

Lines changed: 441 additions & 410 deletions

File tree

plugins/manga-chapters/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ description = "Editor Plugin to generate Manga Table of Contents from an Image C
1010
readme = "README.md"
1111
authors = [{ name = "Rob Brazier", email = "git+github@brzr.co" }]
1212
requires-python = ">=3.11"
13-
dependencies = ["openai>=1.74.0", "pillow>=11.2.1"]
13+
dependencies = ["openai>=1.74.0"]
1414

1515
[tool.setuptools_scm]
1616
fallback_version = "0.0.0+unknown"
Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,28 @@
11
import base64
2-
from io import BytesIO
2+
import mimetypes
33

44
import openai
5-
from PIL import Image
65
from pydantic import BaseModel
76

8-
PROMPT = """
7+
BASE_PROMPT = """
98
Look at the image and output the chapters that you see. Focus only on the text content visible in the image. Do not generate any content that isn't visible in the image.
109
Format the output in Title Case. Do not assume the ordering based on the first number that is read, only use numbering that is visible.
1110
1211
For numbered chapters (numbered being either numeric (e.g. 1,2,3) or roman numerals (e.g. I,II,X), format as 'Chapter [number]: [title]'. Ensure that all roman numerals are converted to their numerical equivalents.
1312
For other chapters without numbers, format as '[category]: [title]' when a category is present. Omit the category when not.
13+
"""
1414

15-
Please can you match the chapter names against the <links> provided, using the page number next to the chapter as reference, omitting any that don't have a page.
15+
LINKS_PROMPT = """
16+
Match the chapter names against the <links> provided, using the page number next to the chapter as reference, omitting any that don't have a page.
1617
IMPORTANT: Do not modify the input links to match chapter numbering on the contents page
1718
"""
1819

20+
PAGES_PROMPT = """
21+
Estimate the page urls, using the page number next to the chapter as a reference.
22+
Use <contents-url> as an anchor - this is where the contents page is. The first chapter is usually the page after this.
23+
The page urls to reference are in <pages> - Please match the chapter names against entries within this list.
24+
"""
25+
1926

2027
class Chapter(BaseModel):
2128
name: str
@@ -31,19 +38,62 @@ def __init__(self, url: str, model: str, api_key: str) -> None:
3138
self.client = openai.OpenAI(api_key=api_key, base_url=url)
3239
self.model = model
3340

34-
def read_chapters(self, links: list[str], image_bytes: bytes) -> dict[str, str]:
35-
image = Image.open(BytesIO(image_bytes))
41+
@staticmethod
42+
def get_image_url(image_filename: str, image_bytes: bytes) -> str:
43+
mime_type, _ = mimetypes.guess_type(image_filename)
3644
encoded_image = base64.b64encode(image_bytes).decode("utf-8")
37-
image_url = f"data:{image.get_format_mimetype()};base64,{encoded_image}"
45+
image_url = f"data:{mime_type};base64,{encoded_image}"
46+
return image_url
47+
48+
@staticmethod
49+
def format_response(response: ChapterResponse) -> dict[str, str]:
50+
return {chapter.link: chapter.name for chapter in response.chapters}
51+
52+
def read_chapters_without_links(
53+
self,
54+
image_filename: str,
55+
image_bytes: bytes,
56+
contents_url: str,
57+
pages: list[str],
58+
) -> dict[str, str]:
59+
pages_text = "<pages>\n" + "\n".join(pages) + "\n</pages>"
60+
image_url = self.get_image_url(image_filename, image_bytes)
61+
response = self.client.beta.chat.completions.parse(
62+
model=self.model,
63+
messages=[
64+
{
65+
"role": "user",
66+
"content": [
67+
{"type": "text", "text": BASE_PROMPT},
68+
{"type": "text", "text": PAGES_PROMPT},
69+
{"type": "image_url", "image_url": {"url": image_url}},
70+
{
71+
"type": "text",
72+
"text": f"<contents-url>{contents_url}</contents-url>",
73+
},
74+
{"type": "text", "text": pages_text},
75+
],
76+
},
77+
],
78+
response_format=ChapterResponse,
79+
)
80+
content = response.choices[0].message.parsed
81+
return self.format_response(content)
82+
83+
def read_chapters_with_links(
84+
self, links: list[str], image_filename: str, image_bytes: bytes
85+
) -> dict[str, str]:
3886
links_text = "<links>\n" + "\n".join(links) + "\n</links>"
3987

88+
image_url = self.get_image_url(image_filename, image_bytes)
4089
response = self.client.beta.chat.completions.parse(
4190
model=self.model,
4291
messages=[
4392
{
4493
"role": "user",
4594
"content": [
46-
{"type": "text", "text": PROMPT},
95+
{"type": "text", "text": BASE_PROMPT},
96+
{"type": "text", "text": LINKS_PROMPT},
4797
{"type": "image_url", "image_url": {"url": image_url}},
4898
{"type": "text", "text": links_text},
4999
],
@@ -52,5 +102,4 @@ def read_chapters(self, links: list[str], image_bytes: bytes) -> dict[str, str]:
52102
response_format=ChapterResponse,
53103
)
54104
content = response.choices[0].message.parsed
55-
print(content)
56-
return {chapter.link: chapter.name for chapter in content.chapters}
105+
return self.format_response(content)

plugins/manga-chapters/src/manga_chapters/main.py

Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import os
22

33
from calibre.customize import Plugin
4+
from calibre.ebooks.oeb.polish.container import Container
5+
from lxml.etree import _Element
46
from calibre.ebooks.oeb.polish.toc import get_toc, commit_toc
57
from calibre.gui2 import error_dialog, question_dialog
68
from calibre.gui2.toc.main import TOC
@@ -36,8 +38,33 @@ def _normalise_path(base, path) -> str:
3638
base_dir = os.path.dirname(base)
3739
return os.path.normpath(os.path.join(base_dir, path))
3840

39-
# -> (image url, links, contents toc index)
40-
def parse_links(self, toc, container) -> tuple[str, list[str], int]:
41+
@staticmethod
42+
def _find_image(contents: _Element) -> str | None:
43+
# Look for HTML img tags
44+
img_tags = contents.xpath("//*[local-name() = 'img']")
45+
if img_tags:
46+
# Check for src attribute
47+
src = img_tags[0].get("src")
48+
if src:
49+
return src
50+
51+
# Look for SVG image tags
52+
image_tags = contents.xpath("//*[local-name() = 'image']")
53+
if image_tags:
54+
# SVG images can use href or xlink:href
55+
href = image_tags[0].get("href")
56+
if href:
57+
return href
58+
59+
# Check for xlink:href which is common in SVG
60+
xlink_href = image_tags[0].get("{http://www.w3.org/1999/xlink}href")
61+
if xlink_href:
62+
return xlink_href
63+
64+
return None
65+
66+
# -> (image url, links, contents toc index, contents url)
67+
def parse_links(self, toc, container) -> tuple[str, list[str], int, str]:
4168
contents_url: str | None = None
4269
contents_index: int | None = None
4370
for i, item in enumerate(toc):
@@ -48,43 +75,53 @@ def parse_links(self, toc, container) -> tuple[str, list[str], int]:
4875
contents_index = i
4976
break
5077
if not contents_url:
51-
raise Exception("Unable to find contents entry")
78+
raise Exception(
79+
"Unable to find contents page. Please Update ToC to identify Contents page"
80+
)
5281

5382
contents = container.parsed(contents_url)
54-
image = next(
55-
iter(
56-
[
57-
self._normalise_path(contents_url, i.get("src"))
58-
for i in contents.xpath("//*[local-name() = 'img']")
59-
]
60-
),
61-
None,
62-
)
83+
image = self._find_image(contents)
84+
if image:
85+
image = self._normalise_path(contents_url, image)
6386
links = [
6487
self._normalise_path(contents_url, a.get("href"))
6588
for a in contents.xpath("//*[local-name() = 'a'][@href]")
6689
]
67-
return image, links, contents_index
90+
return image, links, contents_index, contents_url
6891

6992
def _get_image_contents(self, container, path) -> bytes:
7093
return container.raw_data(path, decode=False)
7194

72-
def _read_chapters(self, links: list[str], image: bytes) -> dict[str, str]:
95+
def _read_chapters(
96+
self,
97+
links: list[str],
98+
image_filename: str,
99+
image: bytes,
100+
contents_url: str,
101+
pages: list[str],
102+
) -> (dict[str, str], bool):
73103
from .llm import LLMReader
74104

75105
url = self.prefs["llm_endpoint"]
76106
model = self.prefs["llm_model"]
77107
api_key = self.prefs["api_key"]
78108
reader = LLMReader(url, model, api_key)
79-
return reader.read_chapters(links, image)
109+
if len(links) > 0:
110+
return reader.read_chapters_with_links(links, image_filename, image), False
111+
return reader.read_chapters_without_links(
112+
image_filename, image, contents_url, pages
113+
), True
80114

81-
def _confirm_apply(self, changes):
115+
def _confirm_apply(self, changes: list[str], estimated: bool):
82116
mappings_string = "\n".join(changes)
117+
disclaimer = ""
118+
if estimated:
119+
disclaimer = "\nIMPORTANT: No links were found in the Contents page, so the Pages were estimated. Please validate these are correct."
83120
return question_dialog(
84121
self.gui,
85122
_("Add Generated Chapters?"),
86123
_(
87-
f"Chapter mappings have been successfully generated:\n\n{mappings_string}\n\nContinue with applying?"
124+
f"Chapter mappings have been successfully generated:\n\n{mappings_string}\n\nContinue with applying?{disclaimer}"
88125
),
89126
)
90127

@@ -104,19 +141,27 @@ def _update_toc(self, toc: TOC, contents_idx: int, entries: dict[str, str]):
104141
self.boss.revert_requested(self.boss.global_undo.previous_container)
105142
raise
106143

144+
def get_pages(self, container: Container) -> list[str]:
145+
return container.manifest_items_of_type("application/xhtml+xml")
146+
107147
def generate_toc(self):
108148
with self:
109149
try:
110150
self.boss.add_savepoint("Before: Generate ToC")
111151
container = self.current_container
112152
toc = get_toc(container)
113-
image, links, contents_idx = self.parse_links(toc, container)
153+
image, links, contents_idx, contents_url = self.parse_links(
154+
toc, container
155+
)
156+
pages = self.get_pages(container)
114157
contents_image = self._get_image_contents(container, image)
115-
chapters = self._read_chapters(links, contents_image)
158+
chapters, estimated = self._read_chapters(
159+
links, image, contents_image, contents_url, pages
160+
)
116161
mappings = []
117162
for link, chapter in chapters.items():
118163
mappings.append(f"{chapter} => {link}")
119-
apply = self._confirm_apply(mappings)
164+
apply = self._confirm_apply(mappings, estimated)
120165
if apply:
121166
self._update_toc(toc, contents_idx + 1, chapters)
122167
except Exception:

0 commit comments

Comments
 (0)