mirror of
https://github.com/jo1gi/grawlix.git
synced 2026-03-26 12:48:27 -06:00
feat: add fixed-layout support for split EPUB sources
- Add rendition properties to Metadata (layout, spread, orientation) - Extract and preserve rendition properties when merging EPUB parts - Fix viewport meta tags for fixed-layout pages - Use EpubItem instead of EpubHtml to preserve original content Fix EPUB validation errors in merged output: - Skip directory entries, mimetype, and META-INF from manifest - Exclude nav/toc from spine for fixed-layout books Improve CSS merging across parts: - Keep longer version of duplicate selectors (more complete rules) - Return None from _get_css_rule_key for invalid font-faces Add cover detection fallback: - Detect cover from largest image on first page when OPF lacks cover info - Optimize lookup with dict instead of nested loops Clean up redundant imports in epub_metadata.py
This commit is contained in:
parent
ed8fe9eafa
commit
08ddad3a74
@ -20,6 +20,10 @@ class Metadata:
|
||||
translators: list[str] = field(default_factory=list)
|
||||
category: Optional[str] = None
|
||||
tags: list[str] = field(default_factory=list)
|
||||
# EPUB 3 rendition properties (fixed-layout support)
|
||||
rendition_layout: Optional[str] = None # "pre-paginated" or "reflowable"
|
||||
rendition_spread: Optional[str] = None # "none", "auto", "landscape", "portrait", "both"
|
||||
rendition_orientation: Optional[str] = None # "auto", "landscape", "portrait"
|
||||
|
||||
def as_dict(self) -> dict:
|
||||
return {
|
||||
|
||||
@ -3,12 +3,162 @@ from grawlix.exceptions import UnsupportedOutputFormat
|
||||
from .output_format import OutputFormat, Update
|
||||
|
||||
import asyncio
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
from ebooklib import epub
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
from zipfile import ZipFile
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from ebooklib import epub
|
||||
import rich
|
||||
|
||||
|
||||
def _fix_fixed_layout_page(html_content: bytes, css_content: bytes = None) -> bytes:
|
||||
"""
|
||||
Fix fixed-layout XHTML pages by adding viewport and fixing broken styles.
|
||||
|
||||
Extracts dimensions from CSS and applies them to viewport and inline styles.
|
||||
"""
|
||||
try:
|
||||
html_str = html_content.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
return html_content
|
||||
|
||||
# Extract dimensions from CSS if provided
|
||||
width = None
|
||||
height = None
|
||||
if css_content:
|
||||
try:
|
||||
css_str = css_content.decode('utf-8')
|
||||
# Look for body width/height
|
||||
width_match = re.search(r'body\s*\{[^}]*width:\s*(\d+)px', css_str)
|
||||
height_match = re.search(r'body\s*\{[^}]*height:\s*(\d+)px', css_str)
|
||||
if width_match:
|
||||
width = width_match.group(1)
|
||||
if height_match:
|
||||
height = height_match.group(1)
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
if not width or not height:
|
||||
return html_content
|
||||
|
||||
# Add viewport meta tag if missing
|
||||
if 'name="viewport"' not in html_str and '<head>' in html_str:
|
||||
viewport_tag = f'<meta name="viewport" content="width={width}, height={height}"/>'
|
||||
html_str = html_str.replace('<head>', f'<head>\n {viewport_tag}', 1)
|
||||
|
||||
# Fix broken inline styles (width:px; height:px;)
|
||||
html_str = re.sub(
|
||||
r'style="width:px;\s*height:px;"',
|
||||
f'style="width:{width}px; height:{height}px;"',
|
||||
html_str
|
||||
)
|
||||
|
||||
return html_str.encode('utf-8')
|
||||
|
||||
|
||||
def _get_css_rule_key(rule_text: str) -> str | None:
|
||||
"""Get unique key for a CSS rule. For @font-face, include font-family."""
|
||||
selector = rule_text.split('{')[0].strip()
|
||||
if selector == '@font-face':
|
||||
# Extract font-family to distinguish different font-faces
|
||||
match = re.search(r'font-family:\s*["\']?([^"\';}]+)', rule_text)
|
||||
if match:
|
||||
return f'@font-face:{match.group(1).strip()}'
|
||||
return None # Skip font-face without font-family
|
||||
return selector if selector else None
|
||||
|
||||
|
||||
def _extract_opf_metadata(opf_content: bytes) -> dict:
|
||||
"""
|
||||
Extract rendition properties, cover info, and spine properties from OPF content.
|
||||
|
||||
Returns dict with keys: rendition_layout, rendition_spread,
|
||||
rendition_orientation, cover_id, cover_href, spine_properties
|
||||
"""
|
||||
result = {
|
||||
'rendition_layout': None,
|
||||
'rendition_spread': None,
|
||||
'rendition_orientation': None,
|
||||
'cover_id': None,
|
||||
'cover_href': None,
|
||||
'spine_properties': {}, # Maps href -> properties (e.g., 'page-spread-left')
|
||||
}
|
||||
|
||||
try:
|
||||
root = ET.fromstring(opf_content)
|
||||
ns = {
|
||||
'opf': 'http://www.idpf.org/2007/opf',
|
||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||
}
|
||||
|
||||
# Find metadata element
|
||||
metadata = root.find('opf:metadata', ns)
|
||||
if metadata is None:
|
||||
metadata = root.find('{http://www.idpf.org/2007/opf}metadata')
|
||||
if metadata is None:
|
||||
return result
|
||||
|
||||
# Extract rendition properties from <meta property="rendition:X">
|
||||
for meta in metadata.iter():
|
||||
if meta.tag.endswith('}meta') or meta.tag == 'meta':
|
||||
prop = meta.get('property', '')
|
||||
if prop == 'rendition:layout':
|
||||
result['rendition_layout'] = meta.text
|
||||
elif prop == 'rendition:spread':
|
||||
result['rendition_spread'] = meta.text
|
||||
elif prop == 'rendition:orientation':
|
||||
result['rendition_orientation'] = meta.text
|
||||
|
||||
# Cover reference: <meta name="cover" content="image-id"/>
|
||||
name = meta.get('name', '')
|
||||
if name == 'cover':
|
||||
result['cover_id'] = meta.get('content')
|
||||
|
||||
# Parse manifest once for cover info and id->href mapping
|
||||
manifest = root.find('opf:manifest', ns)
|
||||
if manifest is None:
|
||||
manifest = root.find('{http://www.idpf.org/2007/opf}manifest')
|
||||
|
||||
id_to_href = {}
|
||||
if manifest is not None:
|
||||
for item in manifest.iter():
|
||||
item_id = item.get('id')
|
||||
item_href = item.get('href')
|
||||
if item_id and item_href:
|
||||
id_to_href[item_id] = item_href
|
||||
|
||||
# Check for cover by ID match
|
||||
if result['cover_id'] and item_id == result['cover_id'] and not result['cover_href']:
|
||||
result['cover_href'] = item_href
|
||||
|
||||
# Check for cover-image property
|
||||
props = item.get('properties', '')
|
||||
if 'cover-image' in props and not result['cover_href']:
|
||||
result['cover_href'] = item_href
|
||||
result['cover_id'] = item_id
|
||||
|
||||
# Extract spine properties (page-spread-left, page-spread-right)
|
||||
spine = root.find('opf:spine', ns)
|
||||
if spine is None:
|
||||
spine = root.find('{http://www.idpf.org/2007/opf}spine')
|
||||
if spine is not None:
|
||||
# Extract spine itemref properties
|
||||
for itemref in spine.iter():
|
||||
if itemref.tag.endswith('}itemref') or itemref.tag == 'itemref':
|
||||
idref = itemref.get('idref')
|
||||
props = itemref.get('properties')
|
||||
if idref and props and idref in id_to_href:
|
||||
href = id_to_href[idref]
|
||||
result['spine_properties'][href] = props
|
||||
|
||||
except ET.ParseError:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class Epub(OutputFormat):
|
||||
extension = "epub"
|
||||
input_types = [SingleFile, HtmlFiles, EpubInParts]
|
||||
@ -88,22 +238,102 @@ class Epub(OutputFormat):
|
||||
progress = 1/(file_count)
|
||||
temporary_file_location = f"{location}.tmp"
|
||||
|
||||
added_files: set[str] = set()
|
||||
def get_new_files(zipfile: ZipFile):
|
||||
"""Returns files in zipfile not already added to file"""
|
||||
for filename in zipfile.namelist():
|
||||
if filename in added_files or filename.endswith(".opf") or filename.endswith(".ncx"):
|
||||
continue
|
||||
yield filename
|
||||
added_files: dict[str, int] = {} # Track filepath -> content size
|
||||
opf_metadata: dict = {}
|
||||
css_cache: dict[str, bytes] = {} # Store CSS content for fixing HTML pages
|
||||
cover_href: str = None # Store cover image path from OPF
|
||||
spine_properties: dict[str, str] = {} # Store spine properties (href -> properties)
|
||||
|
||||
def should_add_file(zipfile: ZipFile, filename: str) -> bool:
|
||||
"""Check if file should be added (new or larger than existing)"""
|
||||
# Skip directory entries, container files (ebooklib handles these), and OPF/NCX
|
||||
if filename.endswith("/"):
|
||||
return False
|
||||
if filename == "mimetype" or filename.startswith("META-INF/"):
|
||||
return False
|
||||
if filename.endswith(".opf") or filename.endswith(".ncx"):
|
||||
return False
|
||||
if filename not in added_files:
|
||||
return True
|
||||
# If file exists, only replace if new version is larger (non-empty beats empty)
|
||||
new_size = zipfile.getinfo(filename).file_size
|
||||
return new_size > added_files[filename]
|
||||
|
||||
output = epub.EpubBook()
|
||||
opf_extracted = False
|
||||
for file in files:
|
||||
await self._download_and_write_file(file, temporary_file_location)
|
||||
with ZipFile(temporary_file_location, "r") as zipfile:
|
||||
for filepath in get_new_files(zipfile):
|
||||
# Extract OPF metadata from first OPF file (before skipping)
|
||||
if not opf_extracted:
|
||||
for filename in zipfile.namelist():
|
||||
if filename.endswith(".opf"):
|
||||
opf_content = zipfile.read(filename)
|
||||
opf_metadata = _extract_opf_metadata(opf_content)
|
||||
# Store rendition properties in metadata
|
||||
if opf_metadata.get('rendition_layout'):
|
||||
metadata.rendition_layout = opf_metadata['rendition_layout']
|
||||
if opf_metadata.get('rendition_spread'):
|
||||
metadata.rendition_spread = opf_metadata['rendition_spread']
|
||||
if opf_metadata.get('rendition_orientation'):
|
||||
metadata.rendition_orientation = opf_metadata['rendition_orientation']
|
||||
if opf_metadata.get('cover_href'):
|
||||
cover_href = opf_metadata['cover_href']
|
||||
if opf_metadata.get('spine_properties'):
|
||||
spine_properties.update(opf_metadata['spine_properties'])
|
||||
opf_extracted = True
|
||||
break
|
||||
|
||||
# Collect CSS files, merging content from all parts
|
||||
for filepath in zipfile.namelist():
|
||||
if filepath.endswith(".css"):
|
||||
content = zipfile.read(filepath)
|
||||
if not content:
|
||||
continue # Skip empty files
|
||||
if filepath not in css_cache:
|
||||
css_cache[filepath] = content
|
||||
else:
|
||||
# Merge: combine rules, keeping the longer version for duplicate selectors
|
||||
existing_str = css_cache[filepath].decode('utf-8', errors='ignore')
|
||||
new_str = content.decode('utf-8', errors='ignore')
|
||||
|
||||
# Parse existing rules into dict: key -> full rule
|
||||
existing_rules = {}
|
||||
for rule in existing_str.split('}'):
|
||||
if '{' in rule:
|
||||
rule_key = _get_css_rule_key(rule)
|
||||
if rule_key:
|
||||
existing_rules[rule_key] = rule.strip() + '}'
|
||||
|
||||
# Process new rules: add new ones, replace if longer
|
||||
for rule in new_str.split('}'):
|
||||
if '{' in rule:
|
||||
rule_key = _get_css_rule_key(rule)
|
||||
if rule_key:
|
||||
new_rule = rule.strip() + '}'
|
||||
if rule_key not in existing_rules or len(new_rule) > len(existing_rules[rule_key]):
|
||||
existing_rules[rule_key] = new_rule
|
||||
|
||||
# Rebuild CSS from merged rules
|
||||
css_cache[filepath] = '\n'.join(existing_rules.values()).encode('utf-8')
|
||||
|
||||
for filepath in zipfile.namelist():
|
||||
# Skip CSS files here - they'll be added after all parts are merged
|
||||
if filepath.endswith(".css"):
|
||||
continue
|
||||
if not should_add_file(zipfile, filepath):
|
||||
continue
|
||||
content = zipfile.read(filepath)
|
||||
file_size = len(content)
|
||||
if filepath.endswith("html"):
|
||||
filename = os.path.basename(filepath)
|
||||
# Fix fixed-layout pages if we have rendition:layout
|
||||
if metadata.rendition_layout == 'pre-paginated':
|
||||
# Find matching CSS (e.g., page1.xhtml -> page1.css)
|
||||
css_path = filepath.replace('.xhtml', '.css').replace('.html', '.css')
|
||||
css_content = css_cache.get(css_path)
|
||||
if css_content:
|
||||
content = _fix_fixed_layout_page(content, css_content)
|
||||
is_in_toc = False
|
||||
title = None
|
||||
for key, value in data.files_in_toc.items():
|
||||
@ -112,13 +342,28 @@ class Epub(OutputFormat):
|
||||
title = value
|
||||
is_in_toc = True
|
||||
break
|
||||
epub_file = epub.EpubHtml(
|
||||
title = title,
|
||||
# Use EpubItem to preserve original content (link tags, viewport, etc.)
|
||||
# EpubHtml parses and regenerates HTML, stripping these
|
||||
epub_file = epub.EpubItem(
|
||||
file_name = filepath,
|
||||
content = content
|
||||
content = content,
|
||||
media_type = 'application/xhtml+xml'
|
||||
)
|
||||
output.add_item(epub_file)
|
||||
output.spine.append(epub_file)
|
||||
# Skip nav.xhtml from spine for fixed-layout (causes blank first page)
|
||||
is_nav = any(x in filepath.lower() for x in ['nav.xhtml', 'nav.html', 'toc.xhtml', 'toc.html'])
|
||||
if not (is_nav and metadata.rendition_layout == 'pre-paginated'):
|
||||
# Check for spine properties (page-spread-left/right)
|
||||
# Try matching with different path variations
|
||||
props = None
|
||||
for href, prop_value in spine_properties.items():
|
||||
if filepath.endswith(href) or href.endswith(os.path.basename(filepath)):
|
||||
props = prop_value
|
||||
break
|
||||
if props:
|
||||
output.spine.append((epub_file, props))
|
||||
else:
|
||||
output.spine.append(epub_file)
|
||||
if is_in_toc:
|
||||
output.toc.append(epub_file)
|
||||
else:
|
||||
@ -127,11 +372,96 @@ class Epub(OutputFormat):
|
||||
content = content
|
||||
)
|
||||
output.add_item(epub_file)
|
||||
added_files.add(filepath)
|
||||
added_files[filepath] = file_size
|
||||
if update:
|
||||
update(progress)
|
||||
os.remove(temporary_file_location)
|
||||
|
||||
# Add merged CSS files after all parts have been processed
|
||||
for css_path, css_content in css_cache.items():
|
||||
css_item = epub.EpubItem(
|
||||
file_name=css_path,
|
||||
content=css_content,
|
||||
media_type='text/css'
|
||||
)
|
||||
output.add_item(css_item)
|
||||
|
||||
# Set cover image if found in source OPF, or detect from first page for fixed-layout
|
||||
if not cover_href and metadata.rendition_layout == 'pre-paginated':
|
||||
# Find first content page from spine (excluding nav/toc)
|
||||
first_page = None
|
||||
for spine_item in output.spine:
|
||||
item = spine_item[0] if isinstance(spine_item, tuple) else spine_item
|
||||
if hasattr(item, 'file_name') and item.file_name:
|
||||
fname = item.file_name.lower()
|
||||
# Skip nav and toc files
|
||||
if 'nav.' in fname or 'toc.' in fname:
|
||||
continue
|
||||
if fname.endswith('.xhtml') or fname.endswith('.html'):
|
||||
first_page = item
|
||||
break
|
||||
|
||||
if first_page and hasattr(first_page, 'content') and first_page.content:
|
||||
# Parse HTML to find all images and pick the largest one
|
||||
try:
|
||||
content = first_page.content.decode('utf-8') if isinstance(first_page.content, bytes) else first_page.content
|
||||
img_matches = re.findall(r'<img[^>]+src=["\']([^"\']+)["\']', content)
|
||||
if img_matches:
|
||||
page_dir = os.path.dirname(first_page.file_name)
|
||||
# Build lookup dict for item sizes
|
||||
item_sizes = {
|
||||
item.file_name: len(item.content)
|
||||
for item in output.items
|
||||
if hasattr(item, 'file_name') and item.file_name
|
||||
and hasattr(item, 'content') and item.content
|
||||
}
|
||||
best_img = None
|
||||
best_size = 0
|
||||
for img_src in img_matches:
|
||||
img_path = os.path.normpath(os.path.join(page_dir, img_src))
|
||||
# Find matching item by suffix
|
||||
for file_name, size in item_sizes.items():
|
||||
if file_name.endswith(img_path):
|
||||
if size > best_size:
|
||||
best_size = size
|
||||
best_img = img_path
|
||||
break
|
||||
if best_img:
|
||||
cover_href = best_img
|
||||
except (UnicodeDecodeError, AttributeError):
|
||||
pass
|
||||
|
||||
if cover_href:
|
||||
# Find the cover image item and mark it as cover
|
||||
for item in output.items:
|
||||
if hasattr(item, 'file_name') and item.file_name and item.file_name.endswith(cover_href):
|
||||
# Get or create item ID
|
||||
item_id = item.id if hasattr(item, 'id') and item.id else os.path.basename(cover_href).replace('.', '-')
|
||||
if not item.id:
|
||||
item.id = item_id
|
||||
# Add EPUB 2 cover metadata: <meta name="cover" content="image-id"/>
|
||||
output.add_metadata('OPF', 'meta', '', {'name': 'cover', 'content': item_id})
|
||||
# Mark item with EPUB 3 cover-image property
|
||||
if not hasattr(item, 'properties') or item.properties is None:
|
||||
item.properties = []
|
||||
if 'cover-image' not in item.properties:
|
||||
item.properties.append('cover-image')
|
||||
break
|
||||
|
||||
# Apply rendition properties to output (fixed-layout support)
|
||||
if metadata.rendition_layout:
|
||||
output.add_metadata(None, 'meta', metadata.rendition_layout, {'property': 'rendition:layout'})
|
||||
if metadata.rendition_spread:
|
||||
output.add_metadata(None, 'meta', metadata.rendition_spread, {'property': 'rendition:spread'})
|
||||
if metadata.rendition_orientation:
|
||||
output.add_metadata(None, 'meta', metadata.rendition_orientation, {'property': 'rendition:orientation'})
|
||||
|
||||
output.add_item(epub.EpubNcx())
|
||||
output.add_item(epub.EpubNav())
|
||||
nav = epub.EpubNav()
|
||||
output.add_item(nav)
|
||||
|
||||
# For fixed-layout, remove nav from spine (it shouldn't be in reading order)
|
||||
if metadata.rendition_layout == 'pre-paginated':
|
||||
output.spine = [item for item in output.spine if item != nav and not (isinstance(item, tuple) and item[0] == nav)]
|
||||
|
||||
epub.write_epub(location, output)
|
||||
|
||||
@ -118,6 +118,10 @@ def _find_opf_file(epub_dir: str) -> str:
|
||||
|
||||
def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxml: bool) -> None:
|
||||
"""Update EPUB metadata elements from Metadata object"""
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
# Helper function to create/update element
|
||||
def update_or_create_element(tag: str, text: str, attribs: dict = None):
|
||||
@ -129,13 +133,7 @@ def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxm
|
||||
metadata_elem.remove(elem)
|
||||
|
||||
# Create new element
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
elem = ET.SubElement(metadata_elem, tag)
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
elem = ET.SubElement(metadata_elem, tag)
|
||||
|
||||
elem = ET.SubElement(metadata_elem, tag)
|
||||
elem.text = str(text)
|
||||
if attribs:
|
||||
for key, value in attribs.items():
|
||||
@ -145,14 +143,7 @@ def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxm
|
||||
def create_meta(name: str, content):
|
||||
if content is None:
|
||||
return
|
||||
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
meta.set('name', name)
|
||||
meta.set('content', str(content))
|
||||
|
||||
@ -166,44 +157,25 @@ def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxm
|
||||
elem.set('id', 'main-title')
|
||||
|
||||
# Add original title
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title")
|
||||
|
||||
orig_title = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}title")
|
||||
orig_title.set('id', 'original-title')
|
||||
orig_title.text = metadata.original_title
|
||||
|
||||
# Add meta refinement for original title
|
||||
if using_lxml:
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
else:
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
meta = ET.SubElement(metadata_elem, f"{{{ns['opf']}}}meta")
|
||||
meta.set('refines', '#original-title')
|
||||
meta.set('property', 'title-type')
|
||||
meta.text = 'original'
|
||||
|
||||
# Authors
|
||||
for author in metadata.authors:
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator")
|
||||
creator = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}creator")
|
||||
creator.text = author
|
||||
creator.set(f"{{{ns['opf']}}}role", "aut")
|
||||
|
||||
# Translators
|
||||
for translator in metadata.translators:
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor")
|
||||
contributor = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}contributor")
|
||||
contributor.text = translator
|
||||
contributor.set(f"{{{ns['opf']}}}role", "trl")
|
||||
|
||||
@ -225,12 +197,7 @@ def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxm
|
||||
metadata_elem.remove(elem)
|
||||
|
||||
# Add new ISBN
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier")
|
||||
identifier = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}identifier")
|
||||
identifier.text = metadata.isbn
|
||||
identifier.set(f"{{{ns['opf']}}}scheme", "ISBN")
|
||||
|
||||
@ -240,22 +207,12 @@ def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxm
|
||||
|
||||
# Category
|
||||
if metadata.category:
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
subject.text = metadata.category
|
||||
|
||||
# Tags
|
||||
for tag in metadata.tags:
|
||||
if using_lxml:
|
||||
from lxml import etree as ET
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
else:
|
||||
import xml.etree.ElementTree as ET
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
subject = ET.SubElement(metadata_elem, f"{{{ns['dc']}}}subject")
|
||||
subject.text = tag
|
||||
|
||||
# Series info (Calibre format) - using series and index fields
|
||||
@ -263,6 +220,26 @@ def _update_epub_metadata(metadata_elem, metadata: Metadata, ns: dict, using_lxm
|
||||
create_meta("calibre:series", metadata.series)
|
||||
create_meta("calibre:series_index", metadata.index)
|
||||
|
||||
# EPUB 3 rendition properties (fixed-layout support)
|
||||
# These use <meta property="...">value</meta> format, not name/content
|
||||
def create_meta_property(property_name: str, value: str):
|
||||
if not value:
|
||||
return
|
||||
# Remove existing property if present
|
||||
for elem in list(metadata_elem):
|
||||
if elem.get('property') == property_name:
|
||||
metadata_elem.remove(elem)
|
||||
meta = ET.SubElement(metadata_elem, 'meta')
|
||||
meta.set('property', property_name)
|
||||
meta.text = value
|
||||
|
||||
if metadata.rendition_layout:
|
||||
create_meta_property('rendition:layout', metadata.rendition_layout)
|
||||
if metadata.rendition_spread:
|
||||
create_meta_property('rendition:spread', metadata.rendition_spread)
|
||||
if metadata.rendition_orientation:
|
||||
create_meta_property('rendition:orientation', metadata.rendition_orientation)
|
||||
|
||||
|
||||
def _repack_epub(epub_dir: str, output_path: str) -> None:
|
||||
"""Repack EPUB directory into ZIP file"""
|
||||
|
||||
Loading…
Reference in New Issue
Block a user