refactor: simplify EPUB metadata architecture

- Remove source_data/transformer pattern in favor of direct Metadata population
- Extend Metadata with new fields: original_title, translators, category, tags
- Rename identifier field to isbn for clarity
- Delete epub_metadata_writers.py (transformers no longer needed)
- Update epub_metadata.py to accept Metadata object instead of dict
- Update Storytel and Nextory sources to populate extended metadata
This commit is contained in:
^_^ 2026-01-14 19:25:37 +01:00
parent ece2d95845
commit 476d460846
7 changed files with 125 additions and 210 deletions

View File

@ -164,7 +164,7 @@ async def download_with_progress(book: Book, progress: Progress, template: str,
await download_book(book, update_function, template) await download_book(book, update_function, template)
# Convert PDF-in-epub to PDF if needed (Nextory wraps PDFs in epub containers) # Convert PDF-in-epub to PDF if needed (Nextory wraps PDFs in epub containers)
if book.source_data and book.source_data.get('source_name') == 'nextory': if book.metadata.source == "Nextory":
from .output import format_output_location, get_default_format from .output import format_output_location, get_default_format
from .output.pdf_converter import convert_pdf_epub_to_pdf, is_pdf_in_epub from .output.pdf_converter import convert_pdf_epub_to_pdf, is_pdf_in_epub
@ -175,10 +175,10 @@ async def download_with_progress(book: Book, progress: Progress, template: str,
convert_pdf_epub_to_pdf(location) convert_pdf_epub_to_pdf(location)
logging.debug(f"Converted PDF-in-epub to PDF: {location}") logging.debug(f"Converted PDF-in-epub to PDF: {location}")
# Write metadata if requested and available # Write metadata if requested
if write_metadata and book.source_data: if write_metadata:
from .output import format_output_location, get_default_format, find_output_format, get_valid_extensions from .output import format_output_location, get_default_format, find_output_format, get_valid_extensions
from .output.metadata import epub_metadata, epub_metadata_writers from .output.metadata import epub_metadata
# Determine output file location # Determine output file location
_, ext = os.path.splitext(template) _, ext = os.path.splitext(template)
@ -195,17 +195,7 @@ async def download_with_progress(book: Book, progress: Progress, template: str,
# Write metadata if it's an EPUB file # Write metadata if it's an EPUB file
if location.endswith('.epub') and os.path.exists(location): if location.endswith('.epub') and os.path.exists(location):
# Get source-specific data and transformer epub_metadata.write_metadata_to_epub(book.metadata, location)
source_name = book.source_data.get('source_name')
source_details = book.source_data.get('details')
if source_name and source_details:
transformer = epub_metadata_writers.get_transformer(source_name)
if transformer:
transformed_metadata = transformer(source_details)
epub_metadata.write_metadata_to_epub(transformed_metadata, location)
else:
logging.debug(f"No metadata transformer found for source: {source_name}")
progress.advance(task, 1) progress.advance(task, 1)

View File

@ -12,10 +12,14 @@ class Metadata:
authors: list[str] = field(default_factory=list) authors: list[str] = field(default_factory=list)
language: Optional[str] = None language: Optional[str] = None
publisher: Optional[str] = None publisher: Optional[str] = None
identifier: Optional[str] = None isbn: Optional[str] = None
description: Optional[str] = None description: Optional[str] = None
release_date: Optional[date] = None release_date: Optional[date] = None
source: Optional[str] = None source: Optional[str] = None
original_title: Optional[str] = None
translators: list[str] = field(default_factory=list)
category: Optional[str] = None
tags: list[str] = field(default_factory=list)
def as_dict(self) -> dict: def as_dict(self) -> dict:
return { return {
@ -23,12 +27,16 @@ class Metadata:
"series": self.series or "UNKNOWN", "series": self.series or "UNKNOWN",
"index": str(self.index) if self.index is not None else "UNKNOWN", "index": str(self.index) if self.index is not None else "UNKNOWN",
"publisher": self.publisher or "UNKNOWN", "publisher": self.publisher or "UNKNOWN",
"identifier": self.identifier or "UNKNOWN", "isbn": self.isbn or "UNKNOWN",
"language": self.language or "UNKNOWN", "language": self.language or "UNKNOWN",
"authors": "; ".join(self.authors), "authors": "; ".join(self.authors),
"description": self.description or "UNKNOWN", "description": self.description or "UNKNOWN",
"release_date": self.release_date.isoformat() if self.release_date else "UNKNOWN", "release_date": self.release_date.isoformat() if self.release_date else "UNKNOWN",
"source": self.source or "UNKNOWN", "source": self.source or "UNKNOWN",
"original_title": self.original_title or "UNKNOWN",
"translators": "; ".join(self.translators),
"category": self.category or "UNKNOWN",
"tags": "; ".join(self.tags),
} }
@ -99,7 +107,6 @@ class Book:
metadata: Metadata metadata: Metadata
data: BookData data: BookData
overwrite: bool = False overwrite: bool = False
source_data: Optional[dict] = None # For storing source-specific data
T = TypeVar("T") T = TypeVar("T")

View File

@ -1,38 +1,22 @@
""" """
Generic EPUB metadata writer Generic EPUB metadata writer
Handles writing standardized metadata to EPUB files from any source Handles writing metadata to EPUB files from book.Metadata
""" """
from grawlix import logging from grawlix import logging
from grawlix.book import Metadata
import zipfile import zipfile
import tempfile import tempfile
import os import os
import shutil import shutil
def write_metadata_to_epub(metadata: dict, epub_path: str) -> None: def write_metadata_to_epub(metadata: Metadata, epub_path: str) -> None:
""" """
Write standardized metadata to EPUB file Write metadata to EPUB file
Expected metadata format: :param metadata: Metadata object from book
{
"title": str,
"original_title": Optional[str],
"authors": List[str],
"translators": List[str],
"description": Optional[str],
"language": Optional[str],
"publisher": Optional[str],
"isbn": Optional[str],
"release_date": Optional[str], # YYYY-MM-DD format
"category": Optional[str],
"tags": List[str],
"series_name": Optional[str],
"series_index": Optional[int]
}
:param metadata: Standardized metadata dict
:param epub_path: Path to the EPUB file :param epub_path: Path to the EPUB file
""" """
try: try:
@ -132,8 +116,8 @@ def _find_opf_file(epub_dir: str) -> str:
return None return None
def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: bool) -> None: def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxml: bool) -> None:
"""Update EPUB metadata elements with standardized metadata""" """Update EPUB metadata elements from Metadata object"""
# Helper function to create/update element # Helper function to create/update element
def update_or_create_element(tag: str, text: str, attribs: dict = None): def update_or_create_element(tag: str, text: str, attribs: dict = None):
@ -158,8 +142,8 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
elem.set(key, value) elem.set(key, value)
# Helper to create meta element # Helper to create meta element
def create_meta(name: str, content: str): def create_meta(name: str, content):
if not content: if content is None:
return return
if using_lxml: if using_lxml:
@ -173,10 +157,10 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
meta.set('content', str(content)) meta.set('content', str(content))
# Title # Title
update_or_create_element(f"{{{ns['dc']}}}title", metadata.get("title")) update_or_create_element(f"{{{ns['dc']}}}title", metadata.title)
# Original Title (EPUB 3 with refinements) # Original Title (EPUB 3 with refinements)
if metadata.get("original_title"): if metadata.original_title:
# Create title with ID for main title # Create title with ID for main title
for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}title", ns)): for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}title", ns)):
elem.set('id', 'main-title') elem.set('id', 'main-title')
@ -190,7 +174,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title") orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title")
orig_title.set('id', 'original-title') orig_title.set('id', 'original-title')
orig_title.text = metadata["original_title"] orig_title.text = metadata.original_title
# Add meta refinement for original title # Add meta refinement for original title
if using_lxml: if using_lxml:
@ -202,7 +186,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
meta.text = 'original' meta.text = 'original'
# Authors # Authors
for author in metadata.get("authors", []): for author in metadata.authors:
if using_lxml: if using_lxml:
from lxml import etree as ET from lxml import etree as ET
creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator") creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator")
@ -213,7 +197,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
creator.set(f"{{{ns['opf']}}}role", "aut") creator.set(f"{{{ns['opf']}}}role", "aut")
# Translators # Translators
for translator in metadata.get("translators", []): for translator in metadata.translators:
if using_lxml: if using_lxml:
from lxml import etree as ET from lxml import etree as ET
contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor") contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor")
@ -223,18 +207,17 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
contributor.text = translator contributor.text = translator
contributor.set(f"{{{ns['opf']}}}role", "trl") contributor.set(f"{{{ns['opf']}}}role", "trl")
# Description (Unicode is automatically handled by lxml/ET) # Description
update_or_create_element(f"{{{ns['dc']}}}description", metadata.get("description")) update_or_create_element(f"{{{ns['dc']}}}description", metadata.description)
# Language # Language
update_or_create_element(f"{{{ns['dc']}}}language", metadata.get("language")) update_or_create_element(f"{{{ns['dc']}}}language", metadata.language)
# Publisher # Publisher
update_or_create_element(f"{{{ns['dc']}}}publisher", metadata.get("publisher")) update_or_create_element(f"{{{ns['dc']}}}publisher", metadata.publisher)
# ISBN # ISBN (from identifier field)
isbn = metadata.get("isbn") if metadata.isbn:
if isbn:
# Remove existing ISBN identifiers # Remove existing ISBN identifiers
for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}identifier", ns)): for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}identifier", ns)):
scheme = elem.get(f"{{{ns['opf']}}}scheme") scheme = elem.get(f"{{{ns['opf']}}}scheme")
@ -248,25 +231,25 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
else: else:
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier") identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier")
identifier.text = isbn identifier.text = metadata.isbn
identifier.set(f"{{{ns['opf']}}}scheme", "ISBN") identifier.set(f"{{{ns['opf']}}}scheme", "ISBN")
# Release Date (already formatted as YYYY-MM-DD) # Release Date (convert date to string)
update_or_create_element(f"{{{ns['dc']}}}date", metadata.get("release_date")) release_date_str = metadata.release_date.isoformat() if metadata.release_date else None
update_or_create_element(f"{{{ns['dc']}}}date", release_date_str)
# Category # Category
category = metadata.get("category") if metadata.category:
if category:
if using_lxml: if using_lxml:
from lxml import etree as ET from lxml import etree as ET
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject") subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
else: else:
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject") subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
subject.text = category subject.text = metadata.category
# Tags # Tags
for tag in metadata.get("tags", []): for tag in metadata.tags:
if using_lxml: if using_lxml:
from lxml import etree as ET from lxml import etree as ET
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject") subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
@ -275,10 +258,10 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject") subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
subject.text = tag subject.text = tag
# Series info (Calibre format) # Series info (Calibre format) - using series and index fields
if metadata.get("series_name"): if metadata.series:
create_meta("calibre:series", metadata.get("series_name")) create_meta("calibre:series", metadata.series)
create_meta("calibre:series_index", metadata.get("series_index")) create_meta("calibre:series_index", metadata.index)
def _repack_epub(epub_dir: str, output_path: str) -> None: def _repack_epub(epub_dir: str, output_path: str) -> None:

View File

@ -1,119 +0,0 @@
"""
Source-specific EPUB metadata transformers
Each source can provide a transformer function that converts their source_data
into a standardized metadata format for EPUB writing.
"""
from datetime import datetime
from typing import Optional
def storytel_transformer(details: dict) -> dict:
"""
Transform Storytel book details JSON into standardized EPUB metadata format
:param details: Storytel book details JSON
:return: Standardized metadata dict
"""
# Extract ebook format
ebook_format = None
for fmt in details.get("formats", []):
if fmt.get("type") == "ebook":
ebook_format = fmt
break
metadata = {
"title": details.get("title"),
"original_title": details.get("originalTitle"),
"authors": [author.get("name", "") for author in details.get("authors", [])],
"translators": [translator.get("name", "") for translator in details.get("translators", [])],
"description": details.get("description"),
"language": details.get("language"),
"category": details.get("category", {}).get("name"),
"tags": [tag.get("name", "") for tag in details.get("tags", [])[:10]], # Max 10
}
# Ebook-specific metadata
if ebook_format:
metadata["publisher"] = ebook_format.get("publisher", {}).get("name")
metadata["isbn"] = ebook_format.get("isbn")
release_date = ebook_format.get("releaseDate")
if release_date:
# Format as YYYY-MM-DD
date_obj = datetime.fromisoformat(release_date.replace("Z", "+00:00"))
metadata["release_date"] = date_obj.strftime("%Y-%m-%d")
# Series info
series_info = details.get("seriesInfo")
if series_info:
metadata["series_name"] = series_info.get("name")
metadata["series_index"] = series_info.get("orderInSeries")
return metadata
def nextory_transformer(details: dict) -> dict:
"""
Transform Nextory book details JSON into standardized EPUB metadata format
:param details: Nextory book details JSON
:return: Standardized metadata dict
"""
# Extract ebook format (epub or pdf - Nextory serves both as epub)
ebook_format = None
for fmt_type in ("epub", "pdf"):
for fmt in details.get("formats", []):
if fmt.get("type") == fmt_type:
ebook_format = fmt
break
if ebook_format:
break
metadata = {
"title": details.get("title"),
"authors": [author.get("name", "") for author in details.get("authors", [])],
"translators": [translator.get("name", "") for translator in ebook_format.get("translators", [])] if ebook_format else [],
"description": details.get("description_full"),
"language": details.get("language"),
}
# Format-specific metadata
if ebook_format:
metadata["publisher"] = ebook_format.get("publisher", {}).get("name")
metadata["isbn"] = ebook_format.get("isbn")
publication_date = ebook_format.get("publication_date")
if publication_date:
# Already in YYYY-MM-DD format
metadata["release_date"] = publication_date
# Series info
series_info = details.get("series")
if series_info:
metadata["series_name"] = series_info.get("name")
# Nextory uses "volume" at top level, not in series info
volume = details.get("volume")
if volume:
metadata["series_index"] = volume
return metadata
# Registry of transformers by source name
TRANSFORMERS = {
"storytel": storytel_transformer,
"nextory": nextory_transformer,
# Add more sources here as they're implemented
}
def get_transformer(source_name: str):
"""
Get the metadata transformer for a given source
:param source_name: Name of the source (lowercase)
:return: Transformer function or None if not found
"""
return TRANSFORMERS.get(source_name.lower())

View File

@ -122,7 +122,6 @@ class Flipp(Source):
metadata = Metadata( metadata = Metadata(
title = f"{metadata['series_name']} {metadata['issueName']}", title = f"{metadata['series_name']} {metadata['issueName']}",
series = metadata["series_name"], series = metadata["series_name"],
identifier = issue_id
), ),
) )

View File

@ -3,7 +3,8 @@ from grawlix.encryption import AESEncryption
from grawlix.exceptions import InvalidUrl from grawlix.exceptions import InvalidUrl
from .source import Source from .source import Source
from typing import Optional, Tuple from typing import Tuple
from datetime import date
import uuid import uuid
import base64 import base64
@ -36,7 +37,7 @@ class Nextory(Source):
session_response = await self._client.post( session_response = await self._client.post(
"https://api.nextory.com/user/v1/sessions", "https://api.nextory.com/user/v1/sessions",
json = { json = {
"identifier": username, "isbn": username,
"password": password "password": password
}, },
) )
@ -116,18 +117,11 @@ class Nextory(Source):
_, format_id = self._find_format(product_data) _, format_id = self._find_format(product_data)
# Nextory serves all books via epub endpoint regardless of original format # Nextory serves all books via epub endpoint regardless of original format
data = await self._get_epub_data(format_id) data = await self._get_epub_data(format_id)
metadata = self._extract_metadata(product_data)
return Book( return Book(
data = data, data = data,
metadata = Metadata( metadata = metadata,
title = product_data["title"],
authors = [author["name"] for author in product_data["authors"]],
series = self._extract_series_name(product_data),
),
source_data = {
"source_name": "nextory",
"details": product_data
}
) )
@ -150,16 +144,70 @@ class Nextory(Source):
for format_type in ("epub", "pdf"): for format_type in ("epub", "pdf"):
for fmt in product_data["formats"]: for fmt in product_data["formats"]:
if fmt["type"] == format_type: if fmt["type"] == format_type:
return (format_type, fmt["identifier"]) return (format_type, fmt["isbn"])
raise InvalidUrl raise InvalidUrl
@staticmethod def _extract_metadata(self, product_data: dict) -> Metadata:
def _extract_series_name(product_info: dict) -> Optional[str]: """
series = product_info.get("series") Extract metadata from Nextory product data
if series is None:
return None :param product_data: Product data from Nextory API
return series["name"] :return: Metadata object
"""
# Find epub or pdf format for format-specific metadata
ebook_format = None
for fmt_type in ("epub", "pdf"):
for fmt in product_data.get("formats", []):
if fmt.get("type") == fmt_type:
ebook_format = fmt
break
if ebook_format:
break
# Basic metadata
title = product_data.get("title", "Unknown")
authors = [author["name"] for author in product_data.get("authors", [])]
description = product_data.get("description_full")
language = product_data.get("language")
# Format-specific metadata
publisher = None
isbn = None
release_date = None
translators = []
if ebook_format:
publisher = ebook_format.get("publisher", {}).get("name") if ebook_format.get("publisher") else None
isbn = ebook_format.get("isbn")
translators = [t["name"] for t in ebook_format.get("translators", [])]
pub_date = ebook_format.get("publication_date")
if pub_date:
# Format is YYYY-MM-DD
release_date = date.fromisoformat(pub_date)
# Series info
series = None
index = None
series_info = product_data.get("series")
if series_info:
series = series_info.get("name")
volume = product_data.get("volume")
if volume:
index = volume
return Metadata(
title=title,
authors=authors,
translators=translators,
language=language,
publisher=publisher,
isbn=isbn,
description=description,
release_date=release_date,
series=series,
index=index,
source="Nextory"
)
async def _get_epub_data(self, epub_id: str) -> BookData: async def _get_epub_data(self, epub_id: str) -> BookData:

View File

@ -129,11 +129,7 @@ class Storytel(Source):
extension = "epub", extension = "epub",
headers = self._client.headers headers = self._client.headers
) )
), )
source_data = {
"source_name": "storytel",
"details": details
}
) )
return book return book
@ -154,15 +150,21 @@ class Storytel(Source):
# Extract basic metadata # Extract basic metadata
title = details.get("title", "Unknown") title = details.get("title", "Unknown")
original_title = details.get("originalTitle")
authors = [author["name"] for author in details.get("authors", [])] authors = [author["name"] for author in details.get("authors", [])]
translators = [translator["name"] for translator in details.get("translators", [])]
language = details.get("language") language = details.get("language")
description = details.get("description") description = details.get("description")
category = details.get("category", {}).get("name") if details.get("category") else None
tags = [tag["name"] for tag in details.get("tags", [])[:10]]
# Extract ebook-specific publisher and release date # Extract ebook-specific publisher, ISBN, and release date
publisher = None publisher = None
isbn = None
release_date = None release_date = None
if ebook_format: if ebook_format:
publisher = ebook_format.get("publisher", {}).get("name") publisher = ebook_format.get("publisher", {}).get("name")
isbn = ebook_format.get("isbn")
release_date_str = ebook_format.get("releaseDate") release_date_str = ebook_format.get("releaseDate")
if release_date_str: if release_date_str:
# Parse ISO format date # Parse ISO format date
@ -178,13 +180,18 @@ class Storytel(Source):
return Metadata( return Metadata(
title=title, title=title,
original_title=original_title,
authors=authors, authors=authors,
translators=translators,
language=language, language=language,
publisher=publisher, publisher=publisher,
isbn=isbn,
description=description, description=description,
release_date=release_date, release_date=release_date,
series=series, series=series,
index=index, index=index,
category=category,
tags=tags,
source="Storytel" source="Storytel"
) )