diff --git a/grawlix/__main__.py b/grawlix/__main__.py index 0b4341f..7208327 100644 --- a/grawlix/__main__.py +++ b/grawlix/__main__.py @@ -164,7 +164,7 @@ async def download_with_progress(book: Book, progress: Progress, template: str, await download_book(book, update_function, template) # Convert PDF-in-epub to PDF if needed (Nextory wraps PDFs in epub containers) - if book.source_data and book.source_data.get('source_name') == 'nextory': + if book.metadata.source == "Nextory": from .output import format_output_location, get_default_format from .output.pdf_converter import convert_pdf_epub_to_pdf, is_pdf_in_epub @@ -175,10 +175,10 @@ async def download_with_progress(book: Book, progress: Progress, template: str, convert_pdf_epub_to_pdf(location) logging.debug(f"Converted PDF-in-epub to PDF: {location}") - # Write metadata if requested and available - if write_metadata and book.source_data: + # Write metadata if requested + if write_metadata: from .output import format_output_location, get_default_format, find_output_format, get_valid_extensions - from .output.metadata import epub_metadata, epub_metadata_writers + from .output.metadata import epub_metadata # Determine output file location _, ext = os.path.splitext(template) @@ -195,17 +195,7 @@ async def download_with_progress(book: Book, progress: Progress, template: str, # Write metadata if it's an EPUB file if location.endswith('.epub') and os.path.exists(location): - # Get source-specific data and transformer - source_name = book.source_data.get('source_name') - source_details = book.source_data.get('details') - - if source_name and source_details: - transformer = epub_metadata_writers.get_transformer(source_name) - if transformer: - transformed_metadata = transformer(source_details) - epub_metadata.write_metadata_to_epub(transformed_metadata, location) - else: - logging.debug(f"No metadata transformer found for source: {source_name}") + epub_metadata.write_metadata_to_epub(book.metadata, location) progress.advance(task, 1) diff --git a/grawlix/book.py b/grawlix/book.py index 981e5a7..726eaf8 100644 --- a/grawlix/book.py +++ b/grawlix/book.py @@ -12,10 +12,14 @@ class Metadata: authors: list[str] = field(default_factory=list) language: Optional[str] = None publisher: Optional[str] = None - identifier: Optional[str] = None + isbn: Optional[str] = None description: Optional[str] = None release_date: Optional[date] = None source: Optional[str] = None + original_title: Optional[str] = None + translators: list[str] = field(default_factory=list) + category: Optional[str] = None + tags: list[str] = field(default_factory=list) def as_dict(self) -> dict: return { @@ -23,12 +27,16 @@ class Metadata: "series": self.series or "UNKNOWN", "index": str(self.index) if self.index is not None else "UNKNOWN", "publisher": self.publisher or "UNKNOWN", - "identifier": self.identifier or "UNKNOWN", + "isbn": self.isbn or "UNKNOWN", "language": self.language or "UNKNOWN", "authors": "; ".join(self.authors), "description": self.description or "UNKNOWN", "release_date": self.release_date.isoformat() if self.release_date else "UNKNOWN", "source": self.source or "UNKNOWN", + "original_title": self.original_title or "UNKNOWN", + "translators": "; ".join(self.translators), + "category": self.category or "UNKNOWN", + "tags": "; ".join(self.tags), } @@ -99,7 +107,6 @@ class Book: metadata: Metadata data: BookData overwrite: bool = False - source_data: Optional[dict] = None # For storing source-specific data T = TypeVar("T") diff --git a/grawlix/output/metadata/epub_metadata.py b/grawlix/output/metadata/epub_metadata.py index 62e7abd..626027c 100644 --- a/grawlix/output/metadata/epub_metadata.py +++ b/grawlix/output/metadata/epub_metadata.py @@ -1,38 +1,22 @@ """ Generic EPUB metadata writer -Handles writing standardized metadata to EPUB files from any source +Handles writing metadata to EPUB files from book.Metadata """ from grawlix import logging +from grawlix.book import Metadata import zipfile import tempfile import os import shutil -def write_metadata_to_epub(metadata: dict, epub_path: str) -> None: +def write_metadata_to_epub(metadata: Metadata, epub_path: str) -> None: """ - Write standardized metadata to EPUB file + Write metadata to EPUB file - Expected metadata format: - { - "title": str, - "original_title": Optional[str], - "authors": List[str], - "translators": List[str], - "description": Optional[str], - "language": Optional[str], - "publisher": Optional[str], - "isbn": Optional[str], - "release_date": Optional[str], # YYYY-MM-DD format - "category": Optional[str], - "tags": List[str], - "series_name": Optional[str], - "series_index": Optional[int] - } - - :param metadata: Standardized metadata dict + :param metadata: Metadata object from book :param epub_path: Path to the EPUB file """ try: @@ -132,8 +116,8 @@ def _find_opf_file(epub_dir: str) -> str: return None -def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: bool) -> None: - """Update EPUB metadata elements with standardized metadata""" +def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxml: bool) -> None: + """Update EPUB metadata elements from Metadata object""" # Helper function to create/update element def update_or_create_element(tag: str, text: str, attribs: dict = None): @@ -158,8 +142,8 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b elem.set(key, value) # Helper to create meta element - def create_meta(name: str, content: str): - if not content: + def create_meta(name: str, content): + if content is None: return if using_lxml: @@ -173,10 +157,10 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b meta.set('content', str(content)) # Title - update_or_create_element(f"{{{ns['dc']}}}title", metadata.get("title")) + update_or_create_element(f"{{{ns['dc']}}}title", metadata.title) # Original Title (EPUB 3 with refinements) - if metadata.get("original_title"): + if metadata.original_title: # Create title with ID for main title for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}title", ns)): elem.set('id', 'main-title') @@ -190,7 +174,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title") orig_title.set('id', 'original-title') - orig_title.text = metadata["original_title"] + orig_title.text = metadata.original_title # Add meta refinement for original title if using_lxml: @@ -202,7 +186,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b meta.text = 'original' # Authors - for author in metadata.get("authors", []): + for author in metadata.authors: if using_lxml: from lxml import etree as ET creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator") @@ -213,7 +197,7 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b creator.set(f"{{{ns['opf']}}}role", "aut") # Translators - for translator in metadata.get("translators", []): + for translator in metadata.translators: if using_lxml: from lxml import etree as ET contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor") @@ -223,18 +207,17 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b contributor.text = translator contributor.set(f"{{{ns['opf']}}}role", "trl") - # Description (Unicode is automatically handled by lxml/ET) - update_or_create_element(f"{{{ns['dc']}}}description", metadata.get("description")) + # Description + update_or_create_element(f"{{{ns['dc']}}}description", metadata.description) # Language - update_or_create_element(f"{{{ns['dc']}}}language", metadata.get("language")) + update_or_create_element(f"{{{ns['dc']}}}language", metadata.language) # Publisher - update_or_create_element(f"{{{ns['dc']}}}publisher", metadata.get("publisher")) + update_or_create_element(f"{{{ns['dc']}}}publisher", metadata.publisher) - # ISBN - isbn = metadata.get("isbn") - if isbn: + # ISBN (from identifier field) + if metadata.isbn: # Remove existing ISBN identifiers for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}identifier", ns)): scheme = elem.get(f"{{{ns['opf']}}}scheme") @@ -248,25 +231,25 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b else: import xml.etree.ElementTree as ET identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier") - identifier.text = isbn + identifier.text = metadata.isbn identifier.set(f"{{{ns['opf']}}}scheme", "ISBN") - # Release Date (already formatted as YYYY-MM-DD) - update_or_create_element(f"{{{ns['dc']}}}date", metadata.get("release_date")) + # Release Date (convert date to string) + release_date_str = metadata.release_date.isoformat() if metadata.release_date else None + update_or_create_element(f"{{{ns['dc']}}}date", release_date_str) # Category - category = metadata.get("category") - if category: + if metadata.category: if using_lxml: from lxml import etree as ET subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject") else: import xml.etree.ElementTree as ET subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject") - subject.text = category + subject.text = metadata.category # Tags - for tag in metadata.get("tags", []): + for tag in metadata.tags: if using_lxml: from lxml import etree as ET subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject") @@ -275,10 +258,10 @@ def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: b subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject") subject.text = tag - # Series info (Calibre format) - if metadata.get("series_name"): - create_meta("calibre:series", metadata.get("series_name")) - create_meta("calibre:series_index", metadata.get("series_index")) + # Series info (Calibre format) - using series and index fields + if metadata.series: + create_meta("calibre:series", metadata.series) + create_meta("calibre:series_index", metadata.index) def _repack_epub(epub_dir: str, output_path: str) -> None: diff --git a/grawlix/output/metadata/epub_metadata_writers.py b/grawlix/output/metadata/epub_metadata_writers.py deleted file mode 100644 index 0f3dc3a..0000000 --- a/grawlix/output/metadata/epub_metadata_writers.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Source-specific EPUB metadata transformers - -Each source can provide a transformer function that converts their source_data -into a standardized metadata format for EPUB writing. -""" - -from datetime import datetime -from typing import Optional - - -def storytel_transformer(details: dict) -> dict: - """ - Transform Storytel book details JSON into standardized EPUB metadata format - - :param details: Storytel book details JSON - :return: Standardized metadata dict - """ - # Extract ebook format - ebook_format = None - for fmt in details.get("formats", []): - if fmt.get("type") == "ebook": - ebook_format = fmt - break - - metadata = { - "title": details.get("title"), - "original_title": details.get("originalTitle"), - "authors": [author.get("name", "") for author in details.get("authors", [])], - "translators": [translator.get("name", "") for translator in details.get("translators", [])], - "description": details.get("description"), - "language": details.get("language"), - "category": details.get("category", {}).get("name"), - "tags": [tag.get("name", "") for tag in details.get("tags", [])[:10]], # Max 10 - } - - # Ebook-specific metadata - if ebook_format: - metadata["publisher"] = ebook_format.get("publisher", {}).get("name") - metadata["isbn"] = ebook_format.get("isbn") - - release_date = ebook_format.get("releaseDate") - if release_date: - # Format as YYYY-MM-DD - date_obj = datetime.fromisoformat(release_date.replace("Z", "+00:00")) - metadata["release_date"] = date_obj.strftime("%Y-%m-%d") - - # Series info - series_info = details.get("seriesInfo") - if series_info: - metadata["series_name"] = series_info.get("name") - metadata["series_index"] = series_info.get("orderInSeries") - - return metadata - - -def nextory_transformer(details: dict) -> dict: - """ - Transform Nextory book details JSON into standardized EPUB metadata format - - :param details: Nextory book details JSON - :return: Standardized metadata dict - """ - # Extract ebook format (epub or pdf - Nextory serves both as epub) - ebook_format = None - for fmt_type in ("epub", "pdf"): - for fmt in details.get("formats", []): - if fmt.get("type") == fmt_type: - ebook_format = fmt - break - if ebook_format: - break - - metadata = { - "title": details.get("title"), - "authors": [author.get("name", "") for author in details.get("authors", [])], - "translators": [translator.get("name", "") for translator in ebook_format.get("translators", [])] if ebook_format else [], - "description": details.get("description_full"), - "language": details.get("language"), - } - - # Format-specific metadata - if ebook_format: - metadata["publisher"] = ebook_format.get("publisher", {}).get("name") - metadata["isbn"] = ebook_format.get("isbn") - - publication_date = ebook_format.get("publication_date") - if publication_date: - # Already in YYYY-MM-DD format - metadata["release_date"] = publication_date - - # Series info - series_info = details.get("series") - if series_info: - metadata["series_name"] = series_info.get("name") - # Nextory uses "volume" at top level, not in series info - volume = details.get("volume") - if volume: - metadata["series_index"] = volume - - return metadata - - -# Registry of transformers by source name -TRANSFORMERS = { - "storytel": storytel_transformer, - "nextory": nextory_transformer, - # Add more sources here as they're implemented -} - - -def get_transformer(source_name: str): - """ - Get the metadata transformer for a given source - - :param source_name: Name of the source (lowercase) - :return: Transformer function or None if not found - """ - return TRANSFORMERS.get(source_name.lower()) diff --git a/grawlix/sources/flipp.py b/grawlix/sources/flipp.py index 9bd16a1..cf2fa76 100644 --- a/grawlix/sources/flipp.py +++ b/grawlix/sources/flipp.py @@ -122,7 +122,6 @@ class Flipp(Source): metadata = Metadata( title = f"{metadata['series_name']} {metadata['issueName']}", series = metadata["series_name"], - identifier = issue_id ), ) diff --git a/grawlix/sources/nextory.py b/grawlix/sources/nextory.py index 04fa309..433a3ec 100644 --- a/grawlix/sources/nextory.py +++ b/grawlix/sources/nextory.py @@ -3,7 +3,8 @@ from grawlix.encryption import AESEncryption from grawlix.exceptions import InvalidUrl from .source import Source -from typing import Optional, Tuple +from typing import Tuple +from datetime import date import uuid import base64 @@ -36,7 +37,7 @@ class Nextory(Source): session_response = await self._client.post( "https://api.nextory.com/user/v1/sessions", json = { - "identifier": username, + "isbn": username, "password": password }, ) @@ -116,18 +117,11 @@ class Nextory(Source): _, format_id = self._find_format(product_data) # Nextory serves all books via epub endpoint regardless of original format data = await self._get_epub_data(format_id) + metadata = self._extract_metadata(product_data) return Book( data = data, - metadata = Metadata( - title = product_data["title"], - authors = [author["name"] for author in product_data["authors"]], - series = self._extract_series_name(product_data), - ), - source_data = { - "source_name": "nextory", - "details": product_data - } + metadata = metadata, ) @@ -150,16 +144,70 @@ class Nextory(Source): for format_type in ("epub", "pdf"): for fmt in product_data["formats"]: if fmt["type"] == format_type: - return (format_type, fmt["identifier"]) + return (format_type, fmt["isbn"]) raise InvalidUrl - @staticmethod - def _extract_series_name(product_info: dict) -> Optional[str]: - series = product_info.get("series") - if series is None: - return None - return series["name"] + def _extract_metadata(self, product_data: dict) -> Metadata: + """ + Extract metadata from Nextory product data + + :param product_data: Product data from Nextory API + :return: Metadata object + """ + # Find epub or pdf format for format-specific metadata + ebook_format = None + for fmt_type in ("epub", "pdf"): + for fmt in product_data.get("formats", []): + if fmt.get("type") == fmt_type: + ebook_format = fmt + break + if ebook_format: + break + + # Basic metadata + title = product_data.get("title", "Unknown") + authors = [author["name"] for author in product_data.get("authors", [])] + description = product_data.get("description_full") + language = product_data.get("language") + + # Format-specific metadata + publisher = None + isbn = None + release_date = None + translators = [] + if ebook_format: + publisher = ebook_format.get("publisher", {}).get("name") if ebook_format.get("publisher") else None + isbn = ebook_format.get("isbn") + translators = [t["name"] for t in ebook_format.get("translators", [])] + pub_date = ebook_format.get("publication_date") + if pub_date: + # Format is YYYY-MM-DD + release_date = date.fromisoformat(pub_date) + + # Series info + series = None + index = None + series_info = product_data.get("series") + if series_info: + series = series_info.get("name") + volume = product_data.get("volume") + if volume: + index = volume + + return Metadata( + title=title, + authors=authors, + translators=translators, + language=language, + publisher=publisher, + isbn=isbn, + description=description, + release_date=release_date, + series=series, + index=index, + source="Nextory" + ) async def _get_epub_data(self, epub_id: str) -> BookData: diff --git a/grawlix/sources/storytel.py b/grawlix/sources/storytel.py index 23af290..f5f2566 100644 --- a/grawlix/sources/storytel.py +++ b/grawlix/sources/storytel.py @@ -129,11 +129,7 @@ class Storytel(Source): extension = "epub", headers = self._client.headers ) - ), - source_data = { - "source_name": "storytel", - "details": details - } + ) ) return book @@ -154,15 +150,21 @@ class Storytel(Source): # Extract basic metadata title = details.get("title", "Unknown") + original_title = details.get("originalTitle") authors = [author["name"] for author in details.get("authors", [])] + translators = [translator["name"] for translator in details.get("translators", [])] language = details.get("language") description = details.get("description") + category = details.get("category", {}).get("name") if details.get("category") else None + tags = [tag["name"] for tag in details.get("tags", [])[:10]] - # Extract ebook-specific publisher and release date + # Extract ebook-specific publisher, ISBN, and release date publisher = None + isbn = None release_date = None if ebook_format: publisher = ebook_format.get("publisher", {}).get("name") + isbn = ebook_format.get("isbn") release_date_str = ebook_format.get("releaseDate") if release_date_str: # Parse ISO format date @@ -178,13 +180,18 @@ class Storytel(Source): return Metadata( title=title, + original_title=original_title, authors=authors, + translators=translators, language=language, publisher=publisher, + isbn=isbn, description=description, release_date=release_date, series=series, index=index, + category=category, + tags=tags, source="Storytel" )