mirror of
https://github.com/jo1gi/grawlix.git
synced 2026-03-25 20:28:45 -06:00
Added functionality to write metadata to epub. Only for Storytel for now. Can be triggered with parameter or set in config.
Added option to add output path to config file.
This commit is contained in:
parent
f5ee64cb6d
commit
ea7ca71408
109
README.md
109
README.md
@ -50,10 +50,15 @@ grawlix --username "user@example.com" --password "SuperSecretPassword" <url>
|
||||
|
||||
**Config file example**
|
||||
```toml
|
||||
[sources.name]
|
||||
# Global settings
|
||||
write_metadata_to_epub = true
|
||||
output = "~/ebooks/{series}/{index} - {title}.{ext}"
|
||||
|
||||
[sources.storytel]
|
||||
username = "user@example.com"
|
||||
password = "SuperSecretPassword"
|
||||
```
|
||||
|
||||
Config file should be placed in:
|
||||
- Linux: `~/.config/grawlix/grawlix.toml`
|
||||
- macOS: `~/Library/Application Support/grawlix/grawlix.toml`
|
||||
@ -61,14 +66,112 @@ Config file should be placed in:
|
||||
|
||||
### Cookies
|
||||
Some sources can be authenticated with Netscape cookie files. I use
|
||||
[this extension](https://github,com/rotemdan/ExportCookies) to export my
|
||||
[this extension](https://github.com/rotemdan/ExportCookies) to export my
|
||||
cookies from my browser.
|
||||
|
||||
Cookies can be placed in current dir as `cookies.txt` or be given with the
|
||||
`--cookie` argument.
|
||||
`--cookies` argument.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Global Settings
|
||||
|
||||
The following settings can be added to your config file (before any `[sources.*]` sections):
|
||||
|
||||
| Setting | Type | Description | Example |
|
||||
|---------|------|-------------|---------|
|
||||
| `write_metadata_to_epub` | boolean | Automatically write metadata to EPUB files (currently supports Storytel) | `true` or `false` |
|
||||
| `output` | string | Default output path template (supports `~`, environment variables, and template variables) | `"~/ebooks/{title}.{ext}"` |
|
||||
|
||||
### Output Templates
|
||||
|
||||
The `output` setting supports template variables that are replaced with book metadata:
|
||||
|
||||
| Variable | Description | Example |
|
||||
|----------|-------------|---------|
|
||||
| `{title}` | Book title | "The Witcher" |
|
||||
| `{series}` | Series name | "The Witcher Saga" |
|
||||
| `{index}` | Series index/number | "1" |
|
||||
| `{authors}` | Authors (semicolon-separated) | "Andrzej Sapkowski" |
|
||||
| `{publisher}` | Publisher name | "Orbit" |
|
||||
| `{language}` | Language code | "en" |
|
||||
| `{release_date}` | Release date | "2020-01-15" |
|
||||
| `{ext}` | File extension | "epub" |
|
||||
|
||||
**Example templates:**
|
||||
```toml
|
||||
# Simple
|
||||
output = "~/books/{title}.{ext}"
|
||||
|
||||
# Organized by series
|
||||
output = "~/books/{series}/{index} - {title}.{ext}"
|
||||
|
||||
# With author
|
||||
output = "~/books/{authors}/{series}/{title}.{ext}"
|
||||
```
|
||||
|
||||
**Path expansion:**
|
||||
- `~` expands to home directory
|
||||
- Environment variables work: `$HOME` (Unix) or `%USERPROFILE%` (Windows)
|
||||
- Absolute paths: `/path/to/books` or `C:\Books`
|
||||
- Relative paths: `downloads/{title}.{ext}` (relative to current directory)
|
||||
|
||||
## Download books
|
||||
|
||||
To download a book run:
|
||||
```shell
|
||||
grawlix [options] <book url>
|
||||
```
|
||||
|
||||
### Command Line Options
|
||||
|
||||
| Option | Short | Description |
|
||||
|--------|-------|-------------|
|
||||
| `--version` | `-v` | Show version number |
|
||||
| `--file <path>` | `-f` | File with URLs (one per line) |
|
||||
| `--username <email>` | `-u` | Username for authentication |
|
||||
| `--password <password>` | `-p` | Password for authentication |
|
||||
| `--library <name>` | | Library name (for sources that require it) |
|
||||
| `--cookies <path>` | `-c` | Path to Netscape cookie file |
|
||||
| `--output <template>` | `-o` | Output path template (overrides config) |
|
||||
| `--write-metadata-to-epub` | | Write metadata to EPUB files (overrides config) |
|
||||
| `--debug` | | Enable debug messages |
|
||||
|
||||
**Examples:**
|
||||
```shell
|
||||
# Download to specific location
|
||||
grawlix -o "~/downloads/{title}.{ext}" <url>
|
||||
|
||||
# Download with metadata writing
|
||||
grawlix --write-metadata-to-epub <url>
|
||||
|
||||
# Batch download from file
|
||||
grawlix -f urls.txt
|
||||
|
||||
# With authentication
|
||||
grawlix -u user@example.com -p password <url>
|
||||
|
||||
# Debug mode
|
||||
grawlix --debug <url>
|
||||
```
|
||||
|
||||
## Metadata Writing
|
||||
|
||||
For supported sources (currently Storytel), grawlix can write rich metadata to EPUB files including:
|
||||
|
||||
- Title and original title
|
||||
- Authors and translators
|
||||
- Series information (Calibre-compatible)
|
||||
- Publisher, ISBN, language
|
||||
- Description and categories
|
||||
- Release date
|
||||
|
||||
Enable globally in config:
|
||||
```toml
|
||||
write_metadata_to_epub = true
|
||||
```
|
||||
|
||||
Or use the CLI flag for one-time use:
|
||||
```shell
|
||||
grawlix --write-metadata-to-epub <url>
|
||||
```
|
||||
|
||||
@ -12,6 +12,10 @@ from functools import partial
|
||||
import os
|
||||
import asyncio
|
||||
import traceback
|
||||
import warnings
|
||||
|
||||
# Suppress deprecation warnings from dependencies
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module="google.protobuf")
|
||||
|
||||
|
||||
def get_or_ask(attr: str, hidden: bool, source_config: Optional[SourceConfig], options) -> str:
|
||||
@ -107,10 +111,13 @@ async def main() -> None:
|
||||
result = await source.download(url)
|
||||
if isinstance(result, Book):
|
||||
with logging.progress(result.metadata.title, source.name) as progress:
|
||||
template: str = args.output or "{title}.{ext}"
|
||||
await download_with_progress(result, progress, template)
|
||||
# Check CLI flag first, then config file, then default
|
||||
template: str = args.output or config.output or "{title}.{ext}"
|
||||
# Check both CLI flag and config file
|
||||
write_metadata = args.write_metadata_to_epub or config.write_metadata_to_epub
|
||||
await download_with_progress(result, progress, template, write_metadata)
|
||||
elif isinstance(result, Series):
|
||||
await download_series(source, result, args)
|
||||
await download_series(source, result, args, config)
|
||||
logging.info("")
|
||||
except GrawlixError as error:
|
||||
error.print_error()
|
||||
@ -119,34 +126,72 @@ async def main() -> None:
|
||||
exit(1)
|
||||
|
||||
|
||||
async def download_series(source: Source, series: Series, args) -> None:
|
||||
async def download_series(source: Source, series: Series, args, config: Config) -> None:
|
||||
"""
|
||||
Download books in series
|
||||
|
||||
:param series: Series to download
|
||||
:param args: CLI arguments
|
||||
:param config: Configuration
|
||||
"""
|
||||
template = args.output or "{series}/{title}.{ext}"
|
||||
# Check CLI flag first, then config file, then default
|
||||
template = args.output or config.output or "{series}/{title}.{ext}"
|
||||
# Check both CLI flag and config file
|
||||
write_metadata = args.write_metadata_to_epub or config.write_metadata_to_epub
|
||||
with logging.progress(series.title, source.name, len(series.book_ids)) as progress:
|
||||
for book_id in series.book_ids:
|
||||
try:
|
||||
book: Book = await source.download_book_from_id(book_id)
|
||||
await download_with_progress(book, progress, template)
|
||||
await download_with_progress(book, progress, template, write_metadata)
|
||||
except AccessDenied as error:
|
||||
logging.info("Skipping - Access Denied")
|
||||
|
||||
|
||||
|
||||
async def download_with_progress(book: Book, progress: Progress, template: str):
|
||||
async def download_with_progress(book: Book, progress: Progress, template: str, write_metadata: bool = False):
|
||||
"""
|
||||
Download book with progress bar in cli
|
||||
|
||||
:param book: Book to download
|
||||
:param progress: Progress object
|
||||
:param template: Output template
|
||||
:param write_metadata: Whether to write metadata to EPUB files
|
||||
"""
|
||||
task = logging.add_book(progress, book)
|
||||
update_function = partial(progress.advance, task)
|
||||
|
||||
# Download the book
|
||||
await download_book(book, update_function, template)
|
||||
|
||||
# Write metadata if requested and available
|
||||
if write_metadata and book.source_data:
|
||||
from .output import format_output_location, get_default_format, find_output_format
|
||||
from . import epub_metadata, epub_metadata_writers
|
||||
|
||||
# Determine output file location
|
||||
_, ext = os.path.splitext(template)
|
||||
ext = ext[1:]
|
||||
if ext:
|
||||
output_format = find_output_format(book, ext)()
|
||||
else:
|
||||
output_format = get_default_format(book)
|
||||
|
||||
location = format_output_location(book, output_format, template)
|
||||
|
||||
# Write metadata if it's an EPUB file
|
||||
if location.endswith('.epub') and os.path.exists(location):
|
||||
# Get source-specific data and transformer
|
||||
source_name = book.source_data.get('source_name')
|
||||
source_details = book.source_data.get('details')
|
||||
|
||||
if source_name and source_details:
|
||||
transformer = epub_metadata_writers.get_transformer(source_name)
|
||||
if transformer:
|
||||
transformed_metadata = transformer(source_details)
|
||||
epub_metadata.write_metadata_to_epub(transformed_metadata, location)
|
||||
else:
|
||||
logging.debug(f"No metadata transformer found for source: {source_name}")
|
||||
|
||||
progress.advance(task, 1)
|
||||
|
||||
|
||||
|
||||
@ -59,4 +59,11 @@ def parse_arguments() -> argparse.Namespace:
|
||||
dest = "debug",
|
||||
action="store_true",
|
||||
)
|
||||
# Metadata
|
||||
parser.add_argument(
|
||||
'--write-metadata-to-epub',
|
||||
help = "Write metadata to EPUB files when downloading",
|
||||
dest = "write_metadata_to_epub",
|
||||
action="store_true",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
@ -20,7 +20,7 @@ class Metadata:
|
||||
return {
|
||||
"title": self.title,
|
||||
"series": self.series or "UNKNOWN",
|
||||
"index": self.index or "UNKNOWN",
|
||||
"index": str(self.index) if self.index is not None else "UNKNOWN",
|
||||
"publisher": self.publisher or "UNKNOWN",
|
||||
"identifier": self.identifier or "UNKNOWN",
|
||||
"language": self.language or "UNKNOWN",
|
||||
@ -97,6 +97,7 @@ class Book:
|
||||
metadata: Metadata
|
||||
data: BookData
|
||||
overwrite: bool = False
|
||||
source_data: Optional[dict] = None # For storing source-specific data
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@ -16,6 +16,8 @@ class SourceConfig:
|
||||
class Config:
|
||||
"""Grawlix configuration"""
|
||||
sources: dict[str, SourceConfig]
|
||||
write_metadata_to_epub: bool = False
|
||||
output: Optional[str] = None
|
||||
|
||||
|
||||
def load_config() -> Config:
|
||||
@ -27,8 +29,17 @@ def load_config() -> Config:
|
||||
config_dir = appdirs.user_config_dir("grawlix", "jo1gi")
|
||||
config_file = os.path.join(config_dir, "grawlix.toml")
|
||||
if os.path.exists(config_file):
|
||||
with open(config_file, "rb") as f:
|
||||
config_dict = tomli.load(f)
|
||||
try:
|
||||
with open(config_file, "rb") as f:
|
||||
config_dict = tomli.load(f)
|
||||
except tomli.TOMLDecodeError as e:
|
||||
print(f"Error parsing config file: {config_file}")
|
||||
print(f" {e}")
|
||||
print("\nPlease check your TOML syntax. Common issues:")
|
||||
print(" - Strings must be quoted: output = \"{title}.{ext}\" not output = {title}.{ext}")
|
||||
print(" - Booleans are lowercase: write_metadata_to_epub = true (not True)")
|
||||
print(" - Use double quotes for strings containing special characters")
|
||||
raise
|
||||
else:
|
||||
config_dict = {}
|
||||
sources = {}
|
||||
@ -38,4 +49,9 @@ def load_config() -> Config:
|
||||
username = values.get("username"),
|
||||
password = values.get("password"),
|
||||
)
|
||||
return Config(sources)
|
||||
|
||||
# Load general settings
|
||||
write_metadata_to_epub = config_dict.get("write_metadata_to_epub", False)
|
||||
output = config_dict.get("output")
|
||||
|
||||
return Config(sources, write_metadata_to_epub, output)
|
||||
|
||||
303
grawlix/epub_metadata.py
Normal file
303
grawlix/epub_metadata.py
Normal file
@ -0,0 +1,303 @@
|
||||
"""
|
||||
Generic EPUB metadata writer
|
||||
|
||||
Handles writing standardized metadata to EPUB files from any source
|
||||
"""
|
||||
|
||||
from grawlix import logging
|
||||
import zipfile
|
||||
import tempfile
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
def write_metadata_to_epub(metadata: dict, epub_path: str) -> None:
|
||||
"""
|
||||
Write standardized metadata to EPUB file
|
||||
|
||||
Expected metadata format:
|
||||
{
|
||||
"title": str,
|
||||
"original_title": Optional[str],
|
||||
"authors": List[str],
|
||||
"translators": List[str],
|
||||
"description": Optional[str],
|
||||
"language": Optional[str],
|
||||
"publisher": Optional[str],
|
||||
"isbn": Optional[str],
|
||||
"release_date": Optional[str], # YYYY-MM-DD format
|
||||
"category": Optional[str],
|
||||
"tags": List[str],
|
||||
"series_name": Optional[str],
|
||||
"series_index": Optional[int]
|
||||
}
|
||||
|
||||
:param metadata: Standardized metadata dict
|
||||
:param epub_path: Path to the EPUB file
|
||||
"""
|
||||
try:
|
||||
from lxml import etree as ET
|
||||
using_lxml = True
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as ET
|
||||
using_lxml = False
|
||||
|
||||
# EPUB namespaces
|
||||
NAMESPACES = {
|
||||
'opf': 'http://www.idpf.org/2007/opf',
|
||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||
'dcterms': 'http://purl.org/dc/terms/',
|
||||
}
|
||||
|
||||
# Register namespaces for ElementTree
|
||||
if not using_lxml:
|
||||
for prefix, uri in NAMESPACES.items():
|
||||
ET.register_namespace(prefix, uri)
|
||||
|
||||
# Create temporary directory for EPUB extraction
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
|
||||
try:
|
||||
# Extract EPUB
|
||||
with zipfile.ZipFile(epub_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(temp_dir)
|
||||
|
||||
# Find OPF file
|
||||
opf_path = _find_opf_file(temp_dir)
|
||||
if not opf_path:
|
||||
logging.debug("Could not find OPF file in EPUB")
|
||||
return
|
||||
|
||||
# Parse OPF file
|
||||
if using_lxml:
|
||||
parser = ET.XMLParser(recover=True, encoding='utf-8')
|
||||
tree = ET.parse(opf_path, parser)
|
||||
else:
|
||||
tree = ET.parse(opf_path)
|
||||
|
||||
root = tree.getroot()
|
||||
|
||||
# Find metadata element
|
||||
if using_lxml:
|
||||
metadata_elem = root.find('.//opf:metadata', NAMESPACES)
|
||||
else:
|
||||
metadata_elem = root.find('opf:metadata', NAMESPACES)
|
||||
|
||||
if metadata_elem is None:
|
||||
logging.debug("Could not find metadata element in OPF")
|
||||
return
|
||||
|
||||
# Update metadata
|
||||
_update_epub_metadata(metadata_elem, metadata, NAMESPACES, using_lxml)
|
||||
|
||||
# Write updated OPF
|
||||
if using_lxml:
|
||||
tree.write(opf_path, encoding='utf-8', xml_declaration=True, pretty_print=True)
|
||||
else:
|
||||
tree.write(opf_path, encoding='utf-8', xml_declaration=True)
|
||||
|
||||
# Repack EPUB
|
||||
_repack_epub(temp_dir, epub_path)
|
||||
|
||||
logging.debug("Successfully wrote metadata to EPUB")
|
||||
|
||||
finally:
|
||||
# Cleanup
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
|
||||
def _find_opf_file(epub_dir: str) -> str:
|
||||
"""Find the OPF file in extracted EPUB directory"""
|
||||
container_path = os.path.join(epub_dir, 'META-INF', 'container.xml')
|
||||
|
||||
if os.path.exists(container_path):
|
||||
try:
|
||||
from lxml import etree as ET
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
tree = ET.parse(container_path)
|
||||
root = tree.getroot()
|
||||
rootfile = root.find('.//{urn:oasis:names:tc:opendocument:xmlns:container}rootfile')
|
||||
if rootfile is not None:
|
||||
opf_relative_path = rootfile.get('full-path')
|
||||
return os.path.join(epub_dir, opf_relative_path)
|
||||
|
||||
# Fallback: search for .opf file
|
||||
for root_dir, dirs, files in os.walk(epub_dir):
|
||||
for file in files:
|
||||
if file.endswith('.opf'):
|
||||
return os.path.join(root_dir, file)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _update_epub_metadata(metadata_elem, metadata: dict, ns: dict, using_lxml: bool) -> None:
|
||||
"""Update EPUB metadata elements with standardized metadata"""
|
||||
|
||||
# Helper function to create/update element
|
||||
def update_or_create_element(tag: str, text: str, attribs: dict = None):
|
||||
if not text:
|
||||
return
|
||||
|
||||
# Remove existing elements with this tag
|
||||
for elem in list(metadata_elem.findall(tag, ns)):
|
||||
metadata_elem.remove(elem)
|
||||
|
||||
# Create new element
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
elem = ET.SubElement(metadata_elem, tag)
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
elem = ET.SubElement(metadata_elem, tag)
|
||||
|
||||
elem.text = str(text)
|
||||
if attribs:
|
||||
for key, value in attribs.items():
|
||||
elem.set(key, value)
|
||||
|
||||
# Helper to create meta element
|
||||
def create_meta(name: str, content: str):
|
||||
if not content:
|
||||
return
|
||||
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
|
||||
meta.set('name', name)
|
||||
meta.set('content', str(content))
|
||||
|
||||
# Title
|
||||
update_or_create_element(f"{{{ns['dc']}}}title", metadata.get("title"))
|
||||
|
||||
# Original Title (EPUB 3 with refinements)
|
||||
if metadata.get("original_title"):
|
||||
# Create title with ID for main title
|
||||
for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}title", ns)):
|
||||
elem.set('id', 'main-title')
|
||||
|
||||
# Add original title
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title")
|
||||
|
||||
orig_title.set('id', 'original-title')
|
||||
orig_title.text = metadata["original_title"]
|
||||
|
||||
# Add meta refinement for original title
|
||||
if using_lxml:
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
else:
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
meta.set('refines', '#original-title')
|
||||
meta.set('property', 'title-type')
|
||||
meta.text = 'original'
|
||||
|
||||
# Authors
|
||||
for author in metadata.get("authors", []):
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator")
|
||||
creator.text = author
|
||||
creator.set(f"{{{ns['opf']}}}role", "aut")
|
||||
|
||||
# Translators
|
||||
for translator in metadata.get("translators", []):
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor")
|
||||
contributor.text = translator
|
||||
contributor.set(f"{{{ns['opf']}}}role", "trl")
|
||||
|
||||
# Description (Unicode is automatically handled by lxml/ET)
|
||||
update_or_create_element(f"{{{ns['dc']}}}description", metadata.get("description"))
|
||||
|
||||
# Language
|
||||
update_or_create_element(f"{{{ns['dc']}}}language", metadata.get("language"))
|
||||
|
||||
# Publisher
|
||||
update_or_create_element(f"{{{ns['dc']}}}publisher", metadata.get("publisher"))
|
||||
|
||||
# ISBN
|
||||
isbn = metadata.get("isbn")
|
||||
if isbn:
|
||||
# Remove existing ISBN identifiers
|
||||
for elem in list(metadata_elem.findall(f"{{{ns['dc']}}}identifier", ns)):
|
||||
scheme = elem.get(f"{{{ns['opf']}}}scheme")
|
||||
if scheme and scheme.upper() == "ISBN":
|
||||
metadata_elem.remove(elem)
|
||||
|
||||
# Add new ISBN
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier")
|
||||
identifier.text = isbn
|
||||
identifier.set(f"{{{ns['opf']}}}scheme", "ISBN")
|
||||
|
||||
# Release Date (already formatted as YYYY-MM-DD)
|
||||
update_or_create_element(f"{{{ns['dc']}}}date", metadata.get("release_date"))
|
||||
|
||||
# Category
|
||||
category = metadata.get("category")
|
||||
if category:
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
subject.text = category
|
||||
|
||||
# Tags
|
||||
for tag in metadata.get("tags", []):
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
subject.text = tag
|
||||
|
||||
# Series info (Calibre format)
|
||||
if metadata.get("series_name"):
|
||||
create_meta("calibre:series", metadata.get("series_name"))
|
||||
create_meta("calibre:series_index", metadata.get("series_index"))
|
||||
|
||||
|
||||
def _repack_epub(epub_dir: str, output_path: str) -> None:
|
||||
"""Repack EPUB directory into ZIP file"""
|
||||
# Remove old EPUB
|
||||
if os.path.exists(output_path):
|
||||
os.remove(output_path)
|
||||
|
||||
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as epub_zip:
|
||||
# mimetype must be first and uncompressed
|
||||
mimetype_path = os.path.join(epub_dir, 'mimetype')
|
||||
if os.path.exists(mimetype_path):
|
||||
epub_zip.write(mimetype_path, 'mimetype', compress_type=zipfile.ZIP_STORED)
|
||||
|
||||
# Add all other files
|
||||
for root, dirs, files in os.walk(epub_dir):
|
||||
for file in files:
|
||||
if file == 'mimetype':
|
||||
continue
|
||||
file_path = os.path.join(root, file)
|
||||
arcname = os.path.relpath(file_path, epub_dir)
|
||||
epub_zip.write(file_path, arcname)
|
||||
71
grawlix/epub_metadata_writers.py
Normal file
71
grawlix/epub_metadata_writers.py
Normal file
@ -0,0 +1,71 @@
|
||||
"""
|
||||
Source-specific EPUB metadata transformers
|
||||
|
||||
Each source can provide a transformer function that converts their source_data
|
||||
into a standardized metadata format for EPUB writing.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def storytel_transformer(details: dict) -> dict:
|
||||
"""
|
||||
Transform Storytel book details JSON into standardized EPUB metadata format
|
||||
|
||||
:param details: Storytel book details JSON
|
||||
:return: Standardized metadata dict
|
||||
"""
|
||||
# Extract ebook format
|
||||
ebook_format = None
|
||||
for fmt in details.get("formats", []):
|
||||
if fmt.get("type") == "ebook":
|
||||
ebook_format = fmt
|
||||
break
|
||||
|
||||
metadata = {
|
||||
"title": details.get("title"),
|
||||
"original_title": details.get("originalTitle"),
|
||||
"authors": [author.get("name", "") for author in details.get("authors", [])],
|
||||
"translators": [translator.get("name", "") for translator in details.get("translators", [])],
|
||||
"description": details.get("description"),
|
||||
"language": details.get("language"),
|
||||
"category": details.get("category", {}).get("name"),
|
||||
"tags": [tag.get("name", "") for tag in details.get("tags", [])[:10]], # Max 10
|
||||
}
|
||||
|
||||
# Ebook-specific metadata
|
||||
if ebook_format:
|
||||
metadata["publisher"] = ebook_format.get("publisher", {}).get("name")
|
||||
metadata["isbn"] = ebook_format.get("isbn")
|
||||
|
||||
release_date = ebook_format.get("releaseDate")
|
||||
if release_date:
|
||||
# Format as YYYY-MM-DD
|
||||
date_obj = datetime.fromisoformat(release_date.replace("Z", "+00:00"))
|
||||
metadata["release_date"] = date_obj.strftime("%Y-%m-%d")
|
||||
|
||||
# Series info
|
||||
series_info = details.get("seriesInfo")
|
||||
if series_info:
|
||||
metadata["series_name"] = series_info.get("name")
|
||||
metadata["series_index"] = series_info.get("orderInSeries")
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
# Registry of transformers by source name
|
||||
TRANSFORMERS = {
|
||||
"storytel": storytel_transformer,
|
||||
# Add more sources here as they're implemented
|
||||
}
|
||||
|
||||
|
||||
def get_transformer(source_name: str):
|
||||
"""
|
||||
Get the metadata transformer for a given source
|
||||
|
||||
:param source_name: Name of the source (lowercase)
|
||||
:return: Transformer function or None if not found
|
||||
"""
|
||||
return TRANSFORMERS.get(source_name.lower())
|
||||
@ -41,11 +41,21 @@ def format_output_location(book: Book, output_format: OutputFormat, template: st
|
||||
|
||||
:param book: Book to download
|
||||
:param output_format: Output format of book
|
||||
:param template: Template for output path
|
||||
:param template: Template for output path (supports ~, environment variables, and absolute paths)
|
||||
:returns: Output path
|
||||
"""
|
||||
values = { key: remove_unwanted_chars(value) for key, value in book.metadata.as_dict().items() }
|
||||
path = template.format(**values, ext = output_format.extension)
|
||||
|
||||
# Expand user home directory (~/... or ~user/...)
|
||||
path = os.path.expanduser(path)
|
||||
|
||||
# Expand environment variables ($VAR or %VAR% depending on OS)
|
||||
path = os.path.expandvars(path)
|
||||
|
||||
# Normalize path separators for current OS
|
||||
path = os.path.normpath(path)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
@ -64,15 +74,65 @@ def remove_strings(input: str, strings: Iterable[str]) -> str:
|
||||
|
||||
def remove_unwanted_chars(input: str) -> str:
|
||||
"""
|
||||
Remove chars from string that are not supported in output path
|
||||
Sanitize string for use in file paths across all operating systems.
|
||||
Replaces forbidden characters with safe alternatives and handles edge cases.
|
||||
|
||||
:param input: The string to remove chars from
|
||||
:returns: input without unsupported chars
|
||||
:param input: The string to sanitize
|
||||
:returns: Safe filename string
|
||||
"""
|
||||
import re
|
||||
|
||||
# Replace null bytes and control characters
|
||||
output = re.sub(r'[\x00-\x1f\x7f]', '', input)
|
||||
|
||||
# Platform-specific forbidden characters - replace with underscore
|
||||
if platform.system() == "Windows":
|
||||
return remove_strings(input, "<>:\"/\\|?*")
|
||||
# Windows forbidden: < > : " / \ | ? *
|
||||
forbidden_chars = '<>:"|?*'
|
||||
for char in forbidden_chars:
|
||||
output = output.replace(char, '_')
|
||||
# Replace slashes with dash for better readability
|
||||
output = output.replace('/', '-')
|
||||
output = output.replace('\\', '-')
|
||||
|
||||
# Windows reserved names (case-insensitive)
|
||||
reserved_names = {
|
||||
'CON', 'PRN', 'AUX', 'NUL',
|
||||
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
|
||||
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'
|
||||
}
|
||||
# Check if the name (without extension) is reserved
|
||||
name_part = output.split('.')[0].upper()
|
||||
if name_part in reserved_names:
|
||||
output = f"_{output}"
|
||||
|
||||
# Remove trailing spaces and periods (Windows doesn't allow these)
|
||||
output = output.rstrip('. ')
|
||||
|
||||
else:
|
||||
return remove_strings(input, "/")
|
||||
# Unix-like systems (macOS, Linux)
|
||||
# Only / is truly forbidden, but : can cause issues on macOS
|
||||
output = output.replace('/', '-')
|
||||
# Some versions of macOS have issues with :
|
||||
output = output.replace(':', '-')
|
||||
|
||||
# Remove leading/trailing whitespace
|
||||
output = output.strip()
|
||||
|
||||
# Limit filename length (most filesystems have 255 byte limit)
|
||||
# Reserve some space for extensions and numbering
|
||||
max_length = 200
|
||||
if len(output.encode('utf-8')) > max_length:
|
||||
# Truncate while respecting UTF-8 character boundaries
|
||||
output_bytes = output.encode('utf-8')[:max_length]
|
||||
# Decode, ignoring partial characters at the end
|
||||
output = output_bytes.decode('utf-8', errors='ignore').rstrip()
|
||||
|
||||
# Ensure we don't return an empty string
|
||||
if not output:
|
||||
output = "untitled"
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def get_default_format(book: Book) -> OutputFormat:
|
||||
|
||||
@ -9,6 +9,7 @@ from urllib3.util import parse_url
|
||||
from Crypto.Cipher import AES
|
||||
from Crypto.Util.Padding import pad
|
||||
from typing import Any
|
||||
from datetime import datetime
|
||||
|
||||
class Storytel(Source):
|
||||
name: str = "Storytel"
|
||||
@ -48,18 +49,77 @@ class Storytel(Source):
|
||||
f"https://api.storytel.net/book-details/consumables/{book_id}?kidsMode=false&configVariant=default"
|
||||
)
|
||||
details = response.json()
|
||||
logging.debug(f"Full book details JSON: {json.dumps(details, indent=2)}")
|
||||
|
||||
return Book(
|
||||
metadata = Metadata(
|
||||
title = details["title"]
|
||||
),
|
||||
# Extract metadata from details
|
||||
metadata = self._extract_metadata(details)
|
||||
|
||||
book = Book(
|
||||
metadata = metadata,
|
||||
data = SingleFile(
|
||||
OnlineFile(
|
||||
url = epub_url,
|
||||
extension = "epub",
|
||||
headers = self._client.headers
|
||||
)
|
||||
)
|
||||
),
|
||||
source_data = {
|
||||
"source_name": "storytel",
|
||||
"details": details
|
||||
}
|
||||
)
|
||||
return book
|
||||
|
||||
|
||||
def _extract_metadata(self, details: dict) -> Metadata:
|
||||
"""
|
||||
Extract metadata from Storytel book details JSON
|
||||
|
||||
:param details: Book details from Storytel API
|
||||
:return: Metadata object
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
# Extract ebook-specific format data
|
||||
ebook_format = None
|
||||
for fmt in details.get("formats", []):
|
||||
if fmt.get("type") == "ebook":
|
||||
ebook_format = fmt
|
||||
break
|
||||
|
||||
# Extract basic metadata
|
||||
title = details.get("title", "Unknown")
|
||||
authors = [author["name"] for author in details.get("authors", [])]
|
||||
language = details.get("language")
|
||||
description = details.get("description")
|
||||
|
||||
# Extract ebook-specific publisher and release date
|
||||
publisher = None
|
||||
release_date = None
|
||||
if ebook_format:
|
||||
publisher = ebook_format.get("publisher", {}).get("name")
|
||||
release_date_str = ebook_format.get("releaseDate")
|
||||
if release_date_str:
|
||||
# Parse ISO format date
|
||||
release_date = datetime.fromisoformat(release_date_str.replace("Z", "+00:00")).date()
|
||||
|
||||
# Extract series information
|
||||
series = None
|
||||
index = None
|
||||
series_info = details.get("seriesInfo")
|
||||
if series_info:
|
||||
series = series_info.get("name")
|
||||
index = series_info.get("orderInSeries")
|
||||
|
||||
return Metadata(
|
||||
title=title,
|
||||
authors=authors,
|
||||
language=language,
|
||||
publisher=publisher,
|
||||
description=description,
|
||||
release_date=release_date,
|
||||
series=series,
|
||||
index=index
|
||||
)
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user