refactor: simplify EPUB metadata architecture

- Remove source_data/transformer pattern in favor of direct Metadata population
- Extend Metadata with new fields: original_title, translators, category, tags
- Rename identifier field to isbn for clarity
- Delete epub_metadata_writers.py (transformers no longer needed)
- Update epub_metadata.py to accept Metadata object instead of dict
- Update Storytel and Nextory sources to populate extended metadata
This commit is contained in:
^_^ 2026-01-14 19:25:37 +01:00
parent ece2d95845
commit 476d460846
7 changed files with 125 additions and 210 deletions

View File

@ -164,7 +164,7 @@ async def download_with_progress(book: Book, progress: Progress, template: str,
await download_book(book, update_function, template)
# Convert PDF-in-epub to PDF if needed (Nextory wraps PDFs in epub containers)
if book.source_data and book.source_data.get('source_name') == 'nextory':
if book.metadata.source == "Nextory":
from .output import format_output_location, get_default_format
from .output.pdf_converter import convert_pdf_epub_to_pdf, is_pdf_in_epub
@ -175,10 +175,10 @@ async def download_with_progress(book: Book, progress: Progress, template: str,
convert_pdf_epub_to_pdf(location)
logging.debug(f"Converted PDF-in-epub to PDF: {location}")
# Write metadata if requested and available
if write_metadata and book.source_data:
# Write metadata if requested
if write_metadata:
from .output import format_output_location, get_default_format, find_output_format, get_valid_extensions
from .output.metadata import epub_metadata, epub_metadata_writers
from .output.metadata import epub_metadata
# Determine output file location
_, ext = os.path.splitext(template)
@ -195,17 +195,7 @@ async def download_with_progress(book: Book, progress: Progress, template: str,
# Write metadata if it's an EPUB file
if location.endswith('.epub') and os.path.exists(location):
# Get source-specific data and transformer
source_name = book.source_data.get('source_name')
source_details = book.source_data.get('details')
if source_name and source_details:
transformer = epub_metadata_writers.get_transformer(source_name)
if transformer:
transformed_metadata = transformer(source_details)
epub_metadata.write_metadata_to_epub(transformed_metadata, location)
else:
logging.debug(f"No metadata transformer found for source: {source_name}")
epub_metadata.write_metadata_to_epub(book.metadata, location)
progress.advance(task, 1)

View File

@ -12,10 +12,14 @@ class Metadata:
authors: list[str] = field(default_factory=list)
language: Optional[str] = None
publisher: Optional[str] = None
identifier: Optional[str] = None
isbn: Optional[str] = None
description: Optional[str] = None
release_date: Optional[date] = None
source: Optional[str] = None
original_title: Optional[str] = None
translators: list[str] = field(default_factory=list)
category: Optional[str] = None
tags: list[str] = field(default_factory=list)
def as_dict(self) -> dict:
return {
@ -23,12 +27,16 @@ class Metadata:
"series": self.series or "UNKNOWN",
"index": str(self.index) if self.index is not None else "UNKNOWN",
"publisher": self.publisher or "UNKNOWN",
"identifier": self.identifier or "UNKNOWN",
"isbn": self.isbn or "UNKNOWN",
"language": self.language or "UNKNOWN",
"authors": "; ".join(self.authors),
"description": self.description or "UNKNOWN",
"release_date": self.release_date.isoformat() if self.release_date else "UNKNOWN",
"source": self.source or "UNKNOWN",
"original_title": self.original_title or "UNKNOWN",
"translators": "; ".join(self.translators),
"category": self.category or "UNKNOWN",
"tags": "; ".join(self.tags),
}
@ -99,7 +107,6 @@ class Book:
metadata: Metadata
data: BookData
overwrite: bool = False
source_data: Optional[dict] = None # For storing source-specific data
T = TypeVar("T")

View File

@ -1,38 +1,22 @@
"""
Generic EPUB metadata writer
Handles writing standardized metadata to EPUB files from any source
Handles writing metadata to EPUB files from book.Metadata
"""
from grawlix import logging
from grawlix.book import Metadata
import zipfile
import tempfile
import os
import shutil
def write_metadata_to_epub(metadata: dict, epub_path: str) -> None:
def write_metadata_to_epub(metadata: Metadata, epub_path: str) -> None:
"""
Write standardized metadata to EPUB file
Write metadata to EPUB file
Expected metadata format:
{
"title": str,
"original_title": Optional[str],
"authors": List[str],
"translators": List[str],
"description": Optional[str],
"language": Optional[str],
"publisher": Optional[str],
"isbn": Optional[str],
"release_date": Optional[str], # YYYY-MM-DD format
"category": Optional[str],
"tags": List[str],
"series_name": Optional[str],
"series_index": Optional[int]
}
:param metadata: Standardized metadata dict
:param metadata: Metadata object from book
:param epub_path: Path to the EPUB file
"""
try:
@ -132,8 +116,8 @@ def _find_opf_file(epub_dir: str) -> str:
return None
def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: bool) -> None:
"""Update EPUB metadata elements with standardized metadata"""
def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxml: bool) -> None:
"""Update EPUB metadata elements from Metadata object"""
# Helper function to create/update element
def update_or_create_element(tag: str, text: str, attribs: dict = None):
@ -158,8 +142,8 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
elem.set(key, value)
# Helper to create meta element
def create_meta(name: str, content: str):
if not content:
def create_meta(name: str, content):
if content is None:
return
if using_lxml:
@ -173,10 +157,10 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
meta.set('content', str(content))
# Title
update_or_create_element(f"{{{ns['dc']}}}title", metadata.get("title"))
update_or_create_element(f"{{{ns['dc']}}}title", metadata.title)
# Original Title (EPUB 3 with refinements)
if metadata.get("original_title"):
if metadata.original_title:
# Create title with ID for main title
for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}title", ns)):
elem.set('id', 'main-title')
@ -190,7 +174,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title")
orig_title.set('id', 'original-title')
orig_title.text = metadata["original_title"]
orig_title.text = metadata.original_title
# Add meta refinement for original title
if using_lxml:
@ -202,7 +186,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
meta.text = 'original'
# Authors
for author in metadata.get("authors", []):
for author in metadata.authors:
if using_lxml:
from lxml import etree as ET
creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator")
@ -213,7 +197,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
creator.set(f"{{{ns['opf']}}}role", "aut")
# Translators
for translator in metadata.get("translators", []):
for translator in metadata.translators:
if using_lxml:
from lxml import etree as ET
contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor")
@ -223,18 +207,17 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
contributor.text = translator
contributor.set(f"{{{ns['opf']}}}role", "trl")
# Description (Unicode is automatically handled by lxml/ET)
update_or_create_element(f"{{{ns['dc']}}}description", metadata.get("description"))
# Description
update_or_create_element(f"{{{ns['dc']}}}description", metadata.description)
# Language
update_or_create_element(f"{{{ns['dc']}}}language", metadata.get("language"))
update_or_create_element(f"{{{ns['dc']}}}language", metadata.language)
# Publisher
update_or_create_element(f"{{{ns['dc']}}}publisher", metadata.get("publisher"))
update_or_create_element(f"{{{ns['dc']}}}publisher", metadata.publisher)
# ISBN
isbn = metadata.get("isbn")
if isbn:
# ISBN (from identifier field)
if metadata.isbn:
# Remove existing ISBN identifiers
for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}identifier", ns)):
scheme = elem.get(f"{{{ns['opf']}}}scheme")
@ -248,25 +231,25 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
else:
import xml.etree.ElementTree as ET
identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier")
identifier.text = isbn
identifier.text = metadata.isbn
identifier.set(f"{{{ns['opf']}}}scheme", "ISBN")
# Release Date (already formatted as YYYY-MM-DD)
update_or_create_element(f"{{{ns['dc']}}}date", metadata.get("release_date"))
# Release Date (convert date to string)
release_date_str = metadata.release_date.isoformat() if metadata.release_date else None
update_or_create_element(f"{{{ns['dc']}}}date", release_date_str)
# Category
category = metadata.get("category")
if category:
if metadata.category:
if using_lxml:
from lxml import etree as ET
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
else:
import xml.etree.ElementTree as ET
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
subject.text = category
subject.text = metadata.category
# Tags
for tag in metadata.get("tags", []):
for tag in metadata.tags:
if using_lxml:
from lxml import etree as ET
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
@ -275,10 +258,10 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
subject.text = tag
# Series info (Calibre format)
if metadata.get("series_name"):
create_meta("calibre:series", metadata.get("series_name"))
create_meta("calibre:series_index", metadata.get("series_index"))
# Series info (Calibre format) - using series and index fields
if metadata.series:
create_meta("calibre:series", metadata.series)
create_meta("calibre:series_index", metadata.index)
def _repack_epub(epub_dir: str, output_path: str) -> None:

View File

@ -1,119 +0,0 @@
"""
Source-specific EPUB metadata transformers
Each source can provide a transformer function that converts their source_data
into a standardized metadata format for EPUB writing.
"""
from datetime import datetime
from typing import Optional
def storytel_transformer(details: dict) -> dict:
"""
Transform Storytel book details JSON into standardized EPUB metadata format
:param details: Storytel book details JSON
:return: Standardized metadata dict
"""
# Extract ebook format
ebook_format = None
for fmt in details.get("formats", []):
if fmt.get("type") == "ebook":
ebook_format = fmt
break
metadata = {
"title": details.get("title"),
"original_title": details.get("originalTitle"),
"authors": [author.get("name", "") for author in details.get("authors", [])],
"translators": [translator.get("name", "") for translator in details.get("translators", [])],
"description": details.get("description"),
"language": details.get("language"),
"category": details.get("category", {}).get("name"),
"tags": [tag.get("name", "") for tag in details.get("tags", [])[:10]], # Max 10
}
# Ebook-specific metadata
if ebook_format:
metadata["publisher"] = ebook_format.get("publisher", {}).get("name")
metadata["isbn"] = ebook_format.get("isbn")
release_date = ebook_format.get("releaseDate")
if release_date:
# Format as YYYY-MM-DD
date_obj = datetime.fromisoformat(release_date.replace("Z", "+00:00"))
metadata["release_date"] = date_obj.strftime("%Y-%m-%d")
# Series info
series_info = details.get("seriesInfo")
if series_info:
metadata["series_name"] = series_info.get("name")
metadata["series_index"] = series_info.get("orderInSeries")
return metadata
def nextory_transformer(details: dict) -> dict:
"""
Transform Nextory book details JSON into standardized EPUB metadata format
:param details: Nextory book details JSON
:return: Standardized metadata dict
"""
# Extract ebook format (epub or pdf - Nextory serves both as epub)
ebook_format = None
for fmt_type in ("epub", "pdf"):
for fmt in details.get("formats", []):
if fmt.get("type") == fmt_type:
ebook_format = fmt
break
if ebook_format:
break
metadata = {
"title": details.get("title"),
"authors": [author.get("name", "") for author in details.get("authors", [])],
"translators": [translator.get("name", "") for translator in ebook_format.get("translators", [])] if ebook_format else [],
"description": details.get("description_full"),
"language": details.get("language"),
}
# Format-specific metadata
if ebook_format:
metadata["publisher"] = ebook_format.get("publisher", {}).get("name")
metadata["isbn"] = ebook_format.get("isbn")
publication_date = ebook_format.get("publication_date")
if publication_date:
# Already in YYYY-MM-DD format
metadata["release_date"] = publication_date
# Series info
series_info = details.get("series")
if series_info:
metadata["series_name"] = series_info.get("name")
# Nextory uses "volume" at top level, not in series info
volume = details.get("volume")
if volume:
metadata["series_index"] = volume
return metadata
# Registry of transformers by source name
TRANSFORMERS = {
"storytel": storytel_transformer,
"nextory": nextory_transformer,
# Add more sources here as they're implemented
}
def get_transformer(source_name: str):
"""
Get the metadata transformer for a given source
:param source_name: Name of the source (lowercase)
:return: Transformer function or None if not found
"""
return TRANSFORMERS.get(source_name.lower())

View File

@ -122,7 +122,6 @@ class Flipp(Source):
metadata = Metadata(
title = f"{metadata['series_name']} {metadata['issueName']}",
series = metadata["series_name"],
identifier = issue_id
),
)

View File

@ -3,7 +3,8 @@ from grawlix.encryption import AESEncryption
from grawlix.exceptions import InvalidUrl
from .source import Source
from typing import Optional, Tuple
from typing import Tuple
from datetime import date
import uuid
import base64
@ -36,7 +37,7 @@ class Nextory(Source):
session_response = await self._client.post(
"https://api.nextory.com/user/v1/sessions",
json = {
"identifier": username,
"isbn": username,
"password": password
},
)
@ -116,18 +117,11 @@ class Nextory(Source):
_, format_id = self._find_format(product_data)
# Nextory serves all books via epub endpoint regardless of original format
data = await self._get_epub_data(format_id)
metadata = self._extract_metadata(product_data)
return Book(
data = data,
metadata = Metadata(
title = product_data["title"],
authors = [author["name"] for author in product_data["authors"]],
series = self._extract_series_name(product_data),
),
source_data = {
"source_name": "nextory",
"details": product_data
}
metadata = metadata,
)
@ -150,16 +144,70 @@ class Nextory(Source):
for format_type in ("epub", "pdf"):
for fmt in product_data["formats"]:
if fmt["type"] == format_type:
return (format_type, fmt["identifier"])
return (format_type, fmt["isbn"])
raise InvalidUrl
@staticmethod
def _extract_series_name(product_info: dict) -> Optional[str]:
series = product_info.get("series")
if series is None:
return None
return series["name"]
def _extract_metadata(self, product_data: dict) -> Metadata:
"""
Extract metadata from Nextory product data
:param product_data: Product data from Nextory API
:return: Metadata object
"""
# Find epub or pdf format for format-specific metadata
ebook_format = None
for fmt_type in ("epub", "pdf"):
for fmt in product_data.get("formats", []):
if fmt.get("type") == fmt_type:
ebook_format = fmt
break
if ebook_format:
break
# Basic metadata
title = product_data.get("title", "Unknown")
authors = [author["name"] for author in product_data.get("authors", [])]
description = product_data.get("description_full")
language = product_data.get("language")
# Format-specific metadata
publisher = None
isbn = None
release_date = None
translators = []
if ebook_format:
publisher = ebook_format.get("publisher", {}).get("name") if ebook_format.get("publisher") else None
isbn = ebook_format.get("isbn")
translators = [t["name"] for t in ebook_format.get("translators", [])]
pub_date = ebook_format.get("publication_date")
if pub_date:
# Format is YYYY-MM-DD
release_date = date.fromisoformat(pub_date)
# Series info
series = None
index = None
series_info = product_data.get("series")
if series_info:
series = series_info.get("name")
volume = product_data.get("volume")
if volume:
index = volume
return Metadata(
title=title,
authors=authors,
translators=translators,
language=language,
publisher=publisher,
isbn=isbn,
description=description,
release_date=release_date,
series=series,
index=index,
source="Nextory"
)
async def _get_epub_data(self, epub_id: str) -> BookData:

View File

@ -129,11 +129,7 @@ class Storytel(Source):
extension = "epub",
headers = self._client.headers
)
),
source_data = {
"source_name": "storytel",
"details": details
}
)
)
return book
@ -154,15 +150,21 @@ class Storytel(Source):
# Extract basic metadata
title = details.get("title", "Unknown")
original_title = details.get("originalTitle")
authors = [author["name"] for author in details.get("authors", [])]
translators = [translator["name"] for translator in details.get("translators", [])]
language = details.get("language")
description = details.get("description")
category = details.get("category", {}).get("name") if details.get("category") else None
tags = [tag["name"] for tag in details.get("tags", [])[:10]]
# Extract ebook-specific publisher and release date
# Extract ebook-specific publisher, ISBN, and release date
publisher = None
isbn = None
release_date = None
if ebook_format:
publisher = ebook_format.get("publisher", {}).get("name")
isbn = ebook_format.get("isbn")
release_date_str = ebook_format.get("releaseDate")
if release_date_str:
# Parse ISO format date
@ -178,13 +180,18 @@ class Storytel(Source):
return Metadata(
title=title,
original_title=original_title,
authors=authors,
translators=translators,
language=language,
publisher=publisher,
isbn=isbn,
description=description,
release_date=release_date,
series=series,
index=index,
category=category,
tags=tags,
source="Storytel"
)