mirror of
https://github.com/jo1gi/grawlix.git
synced 2025-12-16 04:09:10 +00:00
Add code
This commit is contained in:
parent
7bafe32aac
commit
d386cdcd88
4
grawlix/__init__.py
Normal file
4
grawlix/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
from .encryption import Encryption, AESEncryption
|
||||
from .book import Book, SingleFile, OnlineFile, Metadata
|
||||
|
||||
__version__ = "0.1.0"
|
||||
79
grawlix/__main__.py
Normal file
79
grawlix/__main__.py
Normal file
@ -0,0 +1,79 @@
|
||||
from .book import Book, Series
|
||||
from .config import load_config, Config, SourceConfig
|
||||
from .exceptions import SourceNotAuthenticated
|
||||
from .sources import find_source, Source
|
||||
from .output import download_book
|
||||
from . import arguments, logging
|
||||
|
||||
from typing import Tuple
|
||||
from rich.progress import Progress
|
||||
from functools import partial
|
||||
|
||||
|
||||
def get_login(source: Source, config: Config, options) -> Tuple[str, str]:
|
||||
"""
|
||||
Get login credentials for source
|
||||
|
||||
:param source: Source to authenticate
|
||||
:param config: Content of config file
|
||||
:param options: Command line options
|
||||
:returns: Login credentials
|
||||
"""
|
||||
source_name = source.name.lower()
|
||||
if source_name in config.sources:
|
||||
username = config.sources[source_name].username or options.username
|
||||
password = config.sources[source_name].password or options.password
|
||||
else:
|
||||
username = options.username
|
||||
password = options.password
|
||||
return username, password
|
||||
|
||||
|
||||
def authenticate(source: Source, config: Config, options):
|
||||
"""
|
||||
Authenticate with source
|
||||
|
||||
:param source: Source to authenticate
|
||||
:param config: Content of config file
|
||||
:param options: Command line options
|
||||
"""
|
||||
if source.supports_login:
|
||||
username, password = get_login(source, config, options)
|
||||
source.login(username, password)
|
||||
else:
|
||||
raise SourceNotAuthenticated
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = arguments.parse_arguments()
|
||||
config = load_config()
|
||||
for url in args.urls:
|
||||
source: Source = find_source(url)
|
||||
if source.requires_authentication:
|
||||
authenticate(source, config, args)
|
||||
result = source.download(url)
|
||||
if isinstance(result, Book):
|
||||
with logging.progress(result.metadata.title, source.name) as progress:
|
||||
download_with_progress(result, progress)
|
||||
elif isinstance(result, Series):
|
||||
with logging.progress(result.title, source.name, len(result.book_ids)) as progress:
|
||||
for book_id in result.book_ids:
|
||||
book = source.download_book_from_id(book_id)
|
||||
download_with_progress(book, progress)
|
||||
|
||||
|
||||
def download_with_progress(book: Book, progress: Progress):
|
||||
"""
|
||||
Download book with progress bar in cli
|
||||
|
||||
:param book: Book to download
|
||||
:param progress: Progress object
|
||||
"""
|
||||
task = logging.add_book(progress, book)
|
||||
update_function = partial(progress.advance, task)
|
||||
download_book(book, update_function)
|
||||
progress.advance(task, 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
37
grawlix/arguments.py
Normal file
37
grawlix/arguments.py
Normal file
@ -0,0 +1,37 @@
|
||||
from grawlix import __version__
|
||||
|
||||
import argparse
|
||||
|
||||
def parse_arguments():
|
||||
# Help
|
||||
parser = argparse.ArgumentParser(
|
||||
prog = "grawlix",
|
||||
description = "Download ebooks"
|
||||
)
|
||||
parser.add_argument(
|
||||
'-v',
|
||||
'--version',
|
||||
action = "version",
|
||||
version = f"grawlix {__version__}"
|
||||
)
|
||||
# Basics
|
||||
parser.add_argument(
|
||||
'urls',
|
||||
help = "Links to ebooks",
|
||||
nargs = "*"
|
||||
)
|
||||
# Authentication
|
||||
parser.add_argument(
|
||||
'-u',
|
||||
'--username',
|
||||
help = "Username for login",
|
||||
dest = "username",
|
||||
)
|
||||
parser.add_argument(
|
||||
'-p',
|
||||
'--password',
|
||||
help = "Password for login",
|
||||
dest = "password",
|
||||
)
|
||||
# Outputs
|
||||
return parser.parse_args()
|
||||
59
grawlix/book.py
Normal file
59
grawlix/book.py
Normal file
@ -0,0 +1,59 @@
|
||||
from grawlix import Encryption
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Union, TypeVar, Generic
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Metadata:
|
||||
"""Metadata about a book"""
|
||||
title: str
|
||||
series: Optional[str] = None
|
||||
publisher: Optional[str] = None
|
||||
identifier: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class OnlineFile:
|
||||
"""Instructions for downloading an online file"""
|
||||
url: str
|
||||
extension: str
|
||||
encryption: Optional[Encryption] = None
|
||||
headers: Optional[dict[str, str]] = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SingleFile:
|
||||
"""Bookdata in the form of a single file"""
|
||||
file: OnlineFile
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ImageList:
|
||||
"""
|
||||
List of images
|
||||
Mostly used for comic books
|
||||
"""
|
||||
images: list[OnlineFile]
|
||||
|
||||
BookData = Union[
|
||||
SingleFile,
|
||||
ImageList
|
||||
]
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Book:
|
||||
"""Stores information about a book"""
|
||||
metadata: Metadata
|
||||
data: BookData
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Series(Generic[T]):
|
||||
"""Stores a series of books"""
|
||||
title: str
|
||||
book_ids: list[T]
|
||||
|
||||
Result = Union[
|
||||
Book,
|
||||
Series[T]
|
||||
]
|
||||
41
grawlix/config.py
Normal file
41
grawlix/config.py
Normal file
@ -0,0 +1,41 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
import tomli
|
||||
import appdirs
|
||||
import os
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SourceConfig:
|
||||
"""Stores configuration for source"""
|
||||
username: Optional[str]
|
||||
password: Optional[str]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Config:
|
||||
"""Grawlix configuration"""
|
||||
sources: dict[str, SourceConfig]
|
||||
|
||||
|
||||
def load_config() -> Config:
|
||||
"""
|
||||
Load config from disk
|
||||
|
||||
:returns: Config object
|
||||
"""
|
||||
config_dir = appdirs.user_config_dir("grawlix", "jo1gi")
|
||||
config_file = os.path.join(config_dir, "grawlix.toml")
|
||||
if os.path.exists(config_file):
|
||||
with open(config_file, "rb") as f:
|
||||
config_dict = tomli.load(f)
|
||||
else:
|
||||
config_dict = {}
|
||||
sources = {}
|
||||
if "source" in config_dict:
|
||||
for key, values in config_dict["source"].items():
|
||||
sources[key] = SourceConfig (
|
||||
username = values.get("username"),
|
||||
password = values.get("password"),
|
||||
)
|
||||
return Config(sources)
|
||||
38
grawlix/encryption.py
Normal file
38
grawlix/encryption.py
Normal file
@ -0,0 +1,38 @@
|
||||
from Crypto.Cipher import AES
|
||||
from typing import Union
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AESEncryption:
|
||||
key: bytes
|
||||
iv: bytes
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class XOrEncryption:
|
||||
key: bytes
|
||||
|
||||
Encryption = Union[
|
||||
AESEncryption,
|
||||
XOrEncryption
|
||||
]
|
||||
|
||||
def decrypt(data: bytes, encryption: Encryption) -> bytes:
|
||||
"""
|
||||
Decrypt data with specified encryption algorithm
|
||||
|
||||
:param data: Bytes to decrypt
|
||||
:param encryption: Information about how to decrypt
|
||||
:returns: Decrypted data
|
||||
"""
|
||||
if isinstance(encryption, AESEncryption):
|
||||
cipher = AES.new(encryption.key, AES.MODE_CBC, encryption.iv)
|
||||
return cipher.decrypt(data)
|
||||
if isinstance(encryption, XOrEncryption):
|
||||
key_length = len(encryption.key)
|
||||
decoded = []
|
||||
for i in range(0, len(data)):
|
||||
decoded.append(data[i] ^ encryption.key[i % key_length])
|
||||
return bytes(decoded)
|
||||
raise NotImplemented
|
||||
20
grawlix/exceptions.py
Normal file
20
grawlix/exceptions.py
Normal file
@ -0,0 +1,20 @@
|
||||
class GrawlixError(Exception):
|
||||
pass
|
||||
|
||||
class DataNotFound(GrawlixError):
|
||||
pass
|
||||
|
||||
class InvalidUrl(GrawlixError):
|
||||
pass
|
||||
|
||||
class UnsupportedOutputFormat(GrawlixError):
|
||||
pass
|
||||
|
||||
class NoSourceFound(GrawlixError):
|
||||
pass
|
||||
|
||||
class SourceNotAuthenticated(GrawlixError):
|
||||
pass
|
||||
|
||||
class MissingArgument(GrawlixError):
|
||||
pass
|
||||
31
grawlix/logging.py
Normal file
31
grawlix/logging.py
Normal file
@ -0,0 +1,31 @@
|
||||
from grawlix.book import Book
|
||||
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress, BarColumn, ProgressColumn, TaskID, SpinnerColumn
|
||||
import rich
|
||||
|
||||
from typing import Union
|
||||
from dataclasses import dataclass
|
||||
|
||||
console = Console(stderr=True)
|
||||
|
||||
def progress(category_name: str, source_name: str, count=1) -> Progress:
|
||||
if count > 1:
|
||||
console.print(f"Downloading [yellow not bold]{count}[/] books in [blue]{category_name}[/] from [magenta]{source_name}[/]")
|
||||
else:
|
||||
console.print(f"Downloading [blue bold]{category_name}[/] from [magenta]{source_name}[/]")
|
||||
progress = Progress(
|
||||
SpinnerColumn(),
|
||||
"{task.description}",
|
||||
BarColumn(),
|
||||
"[progress.percentage]{task.percentage:>3.0f}%",
|
||||
console = console
|
||||
)
|
||||
return progress
|
||||
|
||||
def add_book(progress: Progress, book: Book) -> TaskID:
|
||||
task = progress.add_task(
|
||||
f"[blue]{book.metadata.title}[/]",
|
||||
total = 1
|
||||
)
|
||||
return task
|
||||
79
grawlix/output/__init__.py
Normal file
79
grawlix/output/__init__.py
Normal file
@ -0,0 +1,79 @@
|
||||
from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile
|
||||
from grawlix.exceptions import GrawlixError
|
||||
|
||||
from .output_format import OutputFormat
|
||||
from .epub import Epub
|
||||
from .cbz import Cbz
|
||||
|
||||
from typing import Callable
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
def download_book(book: Book, update_func: Callable) -> None:
|
||||
"""
|
||||
Download and write book to disk
|
||||
|
||||
:param book: Book to download
|
||||
"""
|
||||
output_format = get_default_format(book.data)
|
||||
location = format_output_location(book, output_format)
|
||||
parent = Path(location).parent
|
||||
if not parent.exists():
|
||||
os.makedirs(parent)
|
||||
if isinstance(book.data, SingleFile):
|
||||
output_format.dl_single_file(book.data, location, update_func)
|
||||
elif isinstance(book.data, ImageList):
|
||||
output_format.dl_image_list(book.data, location, update_func)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def format_output_location(book: Book, output_format: OutputFormat) -> str:
|
||||
"""
|
||||
Create path to output location of book
|
||||
|
||||
:param book: Book to download
|
||||
:param output_format: Output format of book
|
||||
"""
|
||||
series = book.metadata.series or "UNKNOWN"
|
||||
return f"{series}/{book.metadata.title}.{output_format.extension}"
|
||||
|
||||
|
||||
def get_default_format(bookdata: BookData) -> OutputFormat:
|
||||
"""
|
||||
Get default output format for bookdata.
|
||||
Should only be used if no format was specified by the user
|
||||
|
||||
:param bookdata: Content of book
|
||||
:returns: OutputFormat object matching the default
|
||||
"""
|
||||
if isinstance(bookdata, SingleFile):
|
||||
return output_format_from_str(bookdata.file.extension)
|
||||
if isinstance(bookdata, ImageList):
|
||||
return Cbz()
|
||||
raise GrawlixError
|
||||
|
||||
|
||||
def output_format_from_str(name: str) -> OutputFormat:
|
||||
"""
|
||||
Convert string to outputformat object
|
||||
|
||||
:param name: Name of output format
|
||||
:returns: OutputFormat object
|
||||
"""
|
||||
for output_format in get_output_formats():
|
||||
if output_format.extension == name:
|
||||
return output_format()
|
||||
raise GrawlixError
|
||||
|
||||
|
||||
def get_output_formats() -> list[type[OutputFormat]]:
|
||||
"""
|
||||
Get a list of all available output formats
|
||||
|
||||
:returns: List of available output format classes
|
||||
"""
|
||||
return [
|
||||
Cbz,
|
||||
Epub,
|
||||
]
|
||||
18
grawlix/output/cbz.py
Normal file
18
grawlix/output/cbz.py
Normal file
@ -0,0 +1,18 @@
|
||||
from .output_format import OutputFormat, Update
|
||||
from grawlix.book import ImageList
|
||||
|
||||
import zipfile
|
||||
|
||||
class Cbz(OutputFormat):
|
||||
"""Comic book zip file"""
|
||||
|
||||
extension: str = "cbz"
|
||||
|
||||
def dl_image_list(self, book: ImageList, location: str, update: Update) -> None:
|
||||
image_count = len(book.images)
|
||||
with zipfile.ZipFile(location, mode="w") as zip:
|
||||
for n, file in enumerate(book.images):
|
||||
content = self._download_file(file)
|
||||
zip.writestr(f"Image {n}.{file.extension}", content)
|
||||
if update:
|
||||
update(1/image_count)
|
||||
5
grawlix/output/epub.py
Normal file
5
grawlix/output/epub.py
Normal file
@ -0,0 +1,5 @@
|
||||
from grawlix.book import Book, SingleFile
|
||||
from .output_format import OutputFormat
|
||||
|
||||
class Epub(OutputFormat):
|
||||
extension = "epub"
|
||||
68
grawlix/output/output_format.py
Normal file
68
grawlix/output/output_format.py
Normal file
@ -0,0 +1,68 @@
|
||||
from grawlix.book import Book, SingleFile, OnlineFile, ImageList
|
||||
from grawlix.exceptions import UnsupportedOutputFormat
|
||||
from grawlix.encryption import decrypt
|
||||
|
||||
import requests
|
||||
from typing import Callable, Optional
|
||||
|
||||
Update = Optional[Callable[[float], None]]
|
||||
|
||||
class OutputFormat:
|
||||
# Extension for output files
|
||||
extension: str = ""
|
||||
|
||||
def __init__(self):
|
||||
self._session = requests.Session()
|
||||
|
||||
|
||||
def dl_single_file(self, book: SingleFile, location: str, update_func: Update) -> None:
|
||||
"""
|
||||
Download and write an `grawlix.SingleFile` to disk
|
||||
|
||||
:param book: Book to download
|
||||
:param location: Path to where the file is written
|
||||
:raises UnsupportedOutputFormat: If datatype is not supported by format
|
||||
"""
|
||||
if not book.file.extension == self.extension:
|
||||
raise UnsupportedOutputFormat
|
||||
self._download_and_write_file(book.file, location)
|
||||
|
||||
|
||||
def dl_image_list(self, book: ImageList, location: str, update_func: Update) -> None:
|
||||
"""
|
||||
Download and write an `grawlix.ImageList` to disk
|
||||
|
||||
:param book: Book to download
|
||||
:param location: Path to where the file is written
|
||||
:raises UnsupportedOutputFormat: If datatype is not supported by format
|
||||
"""
|
||||
raise UnsupportedOutputFormat
|
||||
|
||||
|
||||
def _download_file(self, file: OnlineFile) -> bytes:
|
||||
"""
|
||||
Download `grawlix.OnlineFile`
|
||||
|
||||
:param file: File to download
|
||||
:returns: Content of downloaded file
|
||||
"""
|
||||
response = self._session.get(
|
||||
file.url,
|
||||
headers = file.headers
|
||||
)
|
||||
content = response.content
|
||||
if file.encryption is not None:
|
||||
content = decrypt(content, file.encryption)
|
||||
return content
|
||||
|
||||
|
||||
def _download_and_write_file(self, file: OnlineFile, location: str) -> None:
|
||||
"""
|
||||
Download `grawlix.OnlineFile` and write to content to disk
|
||||
|
||||
:param file: File to download
|
||||
:param location: Path to where the file is written
|
||||
"""
|
||||
content = self._download_file(file)
|
||||
with open(location, "wb") as f:
|
||||
f.write(content)
|
||||
38
grawlix/sources/__init__.py
Normal file
38
grawlix/sources/__init__.py
Normal file
@ -0,0 +1,38 @@
|
||||
from grawlix.exceptions import NoSourceFound
|
||||
|
||||
from .source import Source
|
||||
from .flipp import Flipp
|
||||
from .mangaplus import MangaPlus
|
||||
from .saxo import Saxo
|
||||
from .webtoons import Webtoons
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def find_source(url: str) -> Source:
|
||||
"""
|
||||
Find source that matches url
|
||||
|
||||
:param url: Url of book to download
|
||||
:returns: Source for downloading url
|
||||
"""
|
||||
for cls in get_source_classes():
|
||||
for num, match in enumerate(cls.match):
|
||||
if re.match(match, url):
|
||||
source = cls()
|
||||
return source
|
||||
raise NoSourceFound
|
||||
|
||||
|
||||
def get_source_classes() -> list[type[Source]]:
|
||||
"""
|
||||
Get all source types
|
||||
|
||||
:returns: A list of all available source types
|
||||
"""
|
||||
return [
|
||||
Flipp,
|
||||
MangaPlus,
|
||||
Saxo,
|
||||
Webtoons
|
||||
]
|
||||
172
grawlix/sources/flipp.py
Normal file
172
grawlix/sources/flipp.py
Normal file
@ -0,0 +1,172 @@
|
||||
from .source import Source
|
||||
from grawlix.book import Book, Metadata, ImageList, OnlineFile, Series, Result
|
||||
from grawlix.exceptions import InvalidUrl, DataNotFound
|
||||
from grawlix.utils import get_arg_from_url
|
||||
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
from typing import Tuple, Optional
|
||||
|
||||
BASEURL = "https://reader.flipp.dk/html5/reader"
|
||||
|
||||
class Flipp(Source):
|
||||
name: str = "Flipp"
|
||||
match = [
|
||||
r"https?://reader.flipp.dk/html5/reader/production/default.aspx\?pubname=&edid=([^/]+)",
|
||||
r"https?://magasiner.flipp.dk/flipp/web-app/#/publications/.+"
|
||||
]
|
||||
_authentication_methods: list[str] = []
|
||||
_login_cache: Optional[dict] = None
|
||||
|
||||
def download(self, url: str) -> Result:
|
||||
if re.match(self.match[0], url):
|
||||
eid = self._get_eid(url)
|
||||
publication_id = self._get_series_id(eid)
|
||||
return self._download_book(eid, publication_id)
|
||||
elif re.match(self.match[1], url):
|
||||
return self._download_series(url)
|
||||
raise InvalidUrl
|
||||
|
||||
|
||||
def download_book_from_id(self, book_id: Tuple[str, str]) -> Book:
|
||||
series_id, issue_id = book_id
|
||||
return self._download_book(issue_id, series_id)
|
||||
|
||||
|
||||
def _download_series(self, url: str) -> Series:
|
||||
"""
|
||||
Download series with book ids from Flipp
|
||||
|
||||
:param url: Url of series
|
||||
:returns: Series object
|
||||
"""
|
||||
series_id = url.split("/")[-1]
|
||||
login_info = self._download_login_info()
|
||||
series_metadata = self._extract_series_data(login_info, series_id)
|
||||
issues = []
|
||||
for issue in series_metadata["issues"]:
|
||||
issue_id = issue["customIssueCode"]
|
||||
issues.append((series_id, issue_id))
|
||||
return Series(
|
||||
title = series_metadata["name"],
|
||||
book_ids = issues
|
||||
)
|
||||
|
||||
|
||||
def _download_login_info(self) -> dict:
|
||||
"""
|
||||
Download login info from Flipp
|
||||
Will use cache if available
|
||||
|
||||
:returns: Login info
|
||||
"""
|
||||
if self._login_cache:
|
||||
return self._login_cache
|
||||
login_info = self._session.post(
|
||||
"https://flippapi.egmontservice.com/api/signin",
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:111.0) Gecko/20100101 Firefox/111.0"
|
||||
},
|
||||
json = {
|
||||
"email": "",
|
||||
"password": "",
|
||||
"token": "",
|
||||
"languageCulture": "da-DK",
|
||||
"appId": "",
|
||||
"appVersion": "",
|
||||
"uuid": "",
|
||||
"os": ""
|
||||
}
|
||||
).json()
|
||||
self.login_cache = login_info
|
||||
return login_info
|
||||
|
||||
|
||||
def _extract_series_data(self, response: dict, series_id: str) -> dict:
|
||||
"""
|
||||
Extract metadata about series from login response
|
||||
|
||||
:param response: Login response from Flipp
|
||||
:param series_id: Id of series
|
||||
:returns: Metadata about series
|
||||
"""
|
||||
for publication in response["publications"]:
|
||||
if publication["customPublicationCode"] == series_id:
|
||||
return publication
|
||||
raise DataNotFound
|
||||
|
||||
|
||||
def _download_book(self, issue_id: str, series_id: str) -> Book:
|
||||
"""
|
||||
Download book from Flipp
|
||||
|
||||
:param issue_id: Issue identifier
|
||||
:param series_id: Series identifier
|
||||
:returns: Book metadata
|
||||
"""
|
||||
pages = self._get_pages(issue_id, series_id)
|
||||
metadata = self._get_metadata(issue_id, series_id)
|
||||
return Book(
|
||||
data = ImageList(pages),
|
||||
metadata = Metadata(
|
||||
title = f"{metadata['series_name']} {metadata['issueName']}",
|
||||
series = metadata["series_name"],
|
||||
identifier = issue_id
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _get_metadata(self, issue_id: str, series_id: str) -> dict:
|
||||
"""
|
||||
Download and extract issue data
|
||||
|
||||
:param issue_id: Issue id
|
||||
:param series_id: Series id
|
||||
:returns: Issue metadata
|
||||
"""
|
||||
login_info = self._download_login_info()
|
||||
series_metadata = self._extract_series_data(login_info, series_id)
|
||||
for issue in series_metadata["issues"]:
|
||||
if issue["customIssueCode"] == issue_id:
|
||||
issue["series_name"] = series_metadata["name"]
|
||||
return issue
|
||||
raise DataNotFound
|
||||
|
||||
def _get_eid(self, url: str) -> str:
|
||||
return get_arg_from_url(url, "edid")
|
||||
|
||||
|
||||
def _get_series_id(self, issue_id: str) -> str:
|
||||
"""
|
||||
Download series id from issue id
|
||||
|
||||
:param issue_id: Issue id
|
||||
:returns: Series id
|
||||
"""
|
||||
response = self._session.get(f"{BASEURL}/production/default.aspx?pubname=&edid={issue_id}")
|
||||
# TODO Make faster
|
||||
search = re.search(r'publicationguid = "([^"]+)', response.text)
|
||||
if search is None:
|
||||
raise DataNotFound
|
||||
return search.group(1)
|
||||
|
||||
|
||||
def _get_pages(self, issue_id: str, series_id: str) -> list[OnlineFile]:
|
||||
"""
|
||||
Download page metadata for book
|
||||
|
||||
:param issue_id: Issue id
|
||||
:param series_id: Series id
|
||||
:return: Page image links
|
||||
"""
|
||||
response = self._session.get(
|
||||
f"{BASEURL}/get_page_groups_from_eid.aspx?pubid={series_id}&eid={issue_id}",
|
||||
)
|
||||
result = []
|
||||
for page in response.json()["pageGroups"]:
|
||||
# Find image id in low quality image url
|
||||
low_quality_url = urlparse(page["pages"][0]["image"])
|
||||
image_id = low_quality_url.path[1:-9]
|
||||
high_quality_url = f"http://pages.cdn.pagesuite.com/{image_id}/highpage.jpg?method=true"
|
||||
result.append(OnlineFile(high_quality_url, "jpg"))
|
||||
return result
|
||||
102
grawlix/sources/mangaplus.py
Normal file
102
grawlix/sources/mangaplus.py
Normal file
@ -0,0 +1,102 @@
|
||||
from .source import Source
|
||||
from grawlix.encryption import XOrEncryption
|
||||
from grawlix.book import Book, Metadata, ImageList, OnlineFile, Series, Result
|
||||
from grawlix.exceptions import InvalidUrl
|
||||
|
||||
import re
|
||||
import blackboxprotobuf
|
||||
import json
|
||||
import rich
|
||||
|
||||
class MangaPlus(Source):
|
||||
name: str = "Manga Plus"
|
||||
match = [
|
||||
r"https?://mangaplus.shueisha.co.jp/viewer/\d+",
|
||||
r"https?://mangaplus.shueisha.co.jp/titles/\d+"
|
||||
]
|
||||
_authentication_methods: list[str] = []
|
||||
|
||||
|
||||
def download(self, url: str) -> Result:
|
||||
if re.match(self.match[0], url):
|
||||
issue_id = url.split('/')[-1]
|
||||
return self._download_issue(issue_id)
|
||||
if re.match(self.match[1], url):
|
||||
series_id = url.split("/")[-1]
|
||||
return self._download_series(series_id)
|
||||
raise InvalidUrl
|
||||
|
||||
|
||||
def download_book_from_id(self, book_id: str) -> Book:
|
||||
return self._download_issue(book_id)
|
||||
|
||||
|
||||
def _download_series(self, series_id: str) -> Series:
|
||||
"""
|
||||
Download series from Manga Plus
|
||||
|
||||
:param series_id: Identifier for series
|
||||
:returns: Series data
|
||||
"""
|
||||
content = self._session.get(
|
||||
f"https://jumpg-api.tokyo-cdn.com/api/title_detailV2",
|
||||
params = {
|
||||
"title_id": series_id,
|
||||
"lang": "eng",
|
||||
"os": "android",
|
||||
"os_ver": "32",
|
||||
"app_ver": "40",
|
||||
"secret": "2afb69fbb05f57a1856cf75e1c4b6ee6"
|
||||
},
|
||||
).content
|
||||
data, _ = blackboxprotobuf.protobuf_to_json(content)
|
||||
parsed = json.loads(data)
|
||||
title = parsed["1"]["8"]["1"]["2"]
|
||||
issues = []
|
||||
def add_issues(data: dict, main: str):
|
||||
if main in data:
|
||||
x = data[main]
|
||||
if isinstance(x, list):
|
||||
for i in x:
|
||||
issues.append(i["2"])
|
||||
else:
|
||||
issues.append(x["2"])
|
||||
for a in parsed["1"]["8"]["28"]:
|
||||
add_issues(a, "2")
|
||||
add_issues(a, "3")
|
||||
add_issues(a, "4")
|
||||
return Series(
|
||||
title,
|
||||
book_ids = issues
|
||||
)
|
||||
|
||||
def _download_issue(self, issue_id: str) -> Book:
|
||||
"""
|
||||
Download issue from Manga Plus
|
||||
|
||||
:param issue_id: Identifier for issue
|
||||
:returns: Issue metadata
|
||||
"""
|
||||
url = f"https://jumpg-webapi.tokyo-cdn.com/api/manga_viewer?chapter_id={issue_id}&split=yes&img_quality=super_high"
|
||||
content = self._session.get(url).content
|
||||
response, _ = blackboxprotobuf.protobuf_to_json(content)
|
||||
images = []
|
||||
parsed = json.loads(response)
|
||||
for image in parsed["1"]["10"]["1"]:
|
||||
if "1" in image:
|
||||
images.append(
|
||||
OnlineFile(
|
||||
image["1"]["1"],
|
||||
extension = "jpg",
|
||||
encryption = XOrEncryption(bytes.fromhex(image["1"]["5"]))
|
||||
)
|
||||
)
|
||||
elif "3" in image:
|
||||
title = image["3"]["1"]["4"]
|
||||
return Book(
|
||||
data = ImageList(images),
|
||||
metadata = Metadata(
|
||||
title,
|
||||
series = parsed["1"]["10"]["5"]
|
||||
)
|
||||
)
|
||||
119
grawlix/sources/saxo.py
Normal file
119
grawlix/sources/saxo.py
Normal file
@ -0,0 +1,119 @@
|
||||
from grawlix.book import Book, Metadata, SingleFile, OnlineFile
|
||||
from grawlix import AESEncryption
|
||||
|
||||
import re
|
||||
from .source import Source
|
||||
|
||||
class Saxo(Source):
|
||||
name: str = "Saxo"
|
||||
match = [
|
||||
r"https://(www.)?saxo.(com|dk)/[^/]+/.+\d+$"
|
||||
]
|
||||
_authentication_methods = [ "login" ]
|
||||
user_id: str
|
||||
|
||||
def login(self, username: str, password: str, **kwargs) -> None:
|
||||
response = self._session.post(
|
||||
"https://auth-read.saxo.com/auth/token",
|
||||
data = {
|
||||
"username": username,
|
||||
"password": password,
|
||||
"grant_type": "password",
|
||||
},
|
||||
headers = {
|
||||
"Content-Type": "application/x-www-form-urlencoded"
|
||||
}
|
||||
)
|
||||
json = response.json()
|
||||
bearer_token = json["access_token"]
|
||||
self._session.headers = {
|
||||
"Appauthorization": f"bearer {bearer_token}",
|
||||
"App-Os": "android",
|
||||
"App-Version": "6.2.4"
|
||||
}
|
||||
self.user_id = json["id"]
|
||||
|
||||
|
||||
def download(self, url: str) -> Book:
|
||||
isbn = self._extract_isbn_from_url(url)
|
||||
book_id = self._get_book_id(isbn)
|
||||
metadata = self._get_book_metadata(book_id)
|
||||
ebook_id = metadata["id"] # Id of ebook file
|
||||
return Book(
|
||||
metadata = self._extract_metadata(metadata),
|
||||
data = SingleFile(
|
||||
OnlineFile(
|
||||
url = self._get_book_file_link(ebook_id),
|
||||
extension = "epub",
|
||||
# Encryption keys extracted from app
|
||||
encryption = AESEncryption(
|
||||
key = b"CD3E9D141D8EFC0886912E7A8F3652C4",
|
||||
iv = b"78CB354D377772F1"
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _get_book_id(self, isbn: str) -> str:
|
||||
"""
|
||||
Download internal book id of book from isbn
|
||||
|
||||
:param isbn: Isbn of book
|
||||
:returns: Saxo internal book id
|
||||
"""
|
||||
response = self._session.get(
|
||||
f"https://api-read.saxo.com/api/v2/search/user/{self.user_id}/premium/books/{isbn}"
|
||||
)
|
||||
return response.json()["items"][0]["bookId"]
|
||||
|
||||
|
||||
def _get_book_metadata(self, book_id: str) -> dict:
|
||||
"""
|
||||
Download metadata of book
|
||||
|
||||
:param book_id: Id of book
|
||||
:returns: Metadata of book
|
||||
"""
|
||||
response = self._session.get(
|
||||
f"https://api-read.saxo.com/api/v2/book/{book_id}/user/{self.user_id}/details"
|
||||
)
|
||||
return response.json()["ebooks"][0]
|
||||
|
||||
|
||||
def _get_book_file_link(self, ebook_id: str) -> str:
|
||||
"""
|
||||
Download link to epub file
|
||||
|
||||
:param ebook_id: Id of ebook file
|
||||
:returns: Link to ebook file
|
||||
"""
|
||||
response = self._session.get(
|
||||
f"https://api-read.saxo.com/api/v1/book/{ebook_id}/content/encryptedstream/"
|
||||
)
|
||||
return response.json()["link"]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _extract_metadata(metadata: dict) -> Metadata:
|
||||
"""
|
||||
Extract metadata from matadata response from Saxo
|
||||
|
||||
:param metadata: Metadata response from saxo
|
||||
:returns: Metadata formatted as `grawlix.Metadata`
|
||||
"""
|
||||
return Metadata(metadata["title"])
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _extract_isbn_from_url(url: str) -> str:
|
||||
"""
|
||||
Extracts isbn from url
|
||||
|
||||
:param url: Url of book
|
||||
:returns: Isbn of book
|
||||
"""
|
||||
isbn_match = re.search(f"\d+$", url)
|
||||
if isbn_match and isbn_match.group():
|
||||
return isbn_match.group()
|
||||
raise NotImplemented
|
||||
60
grawlix/sources/source.py
Normal file
60
grawlix/sources/source.py
Normal file
@ -0,0 +1,60 @@
|
||||
from grawlix.book import Book, Series, Result
|
||||
|
||||
from typing import Generic, TypeVar, Tuple
|
||||
import requests
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
class Source(Generic[T]):
|
||||
"""
|
||||
General class for downloading books from various sources
|
||||
"""
|
||||
|
||||
name: str = "UNKNOWN"
|
||||
match: list[str] = []
|
||||
_authentication_methods: list[str] = []
|
||||
|
||||
def __init__(self):
|
||||
self._session = requests.Session()
|
||||
|
||||
|
||||
@property
|
||||
def requires_authentication(self) -> bool:
|
||||
"""Does the source require authentication to download books"""
|
||||
return len(self._authentication_methods) > 0
|
||||
|
||||
|
||||
@property
|
||||
def supports_login(self) -> bool:
|
||||
"""Does the source support authentication with username and password"""
|
||||
return "login" in self._authentication_methods
|
||||
|
||||
|
||||
def login(self, username: str, password: str, **kwargs: str):
|
||||
"""
|
||||
Login to source
|
||||
|
||||
:param username: Username of user for source
|
||||
:param password: Password of user for source
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def download(self, url: str) -> Result[T]:
|
||||
"""
|
||||
Download book metadata from source
|
||||
|
||||
:param url: Url of book to download
|
||||
:returns: Book metadata
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def download_book_from_id(self, book_id: T) -> Book:
|
||||
"""
|
||||
Download book from id
|
||||
|
||||
:param book_id: Internal id of book
|
||||
:returns: Downloaded book metadata
|
||||
"""
|
||||
raise NotImplementedError
|
||||
92
grawlix/sources/webtoons.py
Normal file
92
grawlix/sources/webtoons.py
Normal file
@ -0,0 +1,92 @@
|
||||
from .source import Source
|
||||
from grawlix.book import Book, Metadata, ImageList, OnlineFile, Series, Result
|
||||
from grawlix.utils import get_arg_from_url
|
||||
from grawlix.exceptions import InvalidUrl
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:111.0) Gecko/20100101 Firefox/111.0"
|
||||
MOBILE_USER_AGENT = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Mobile/15E148 Safari/604.1"
|
||||
|
||||
class Webtoons(Source[str]):
|
||||
|
||||
name: str = "Webtoons"
|
||||
match = [
|
||||
r"https://www.webtoons.com/../.+/.+/.+/viewer\?title_no=\d+&episode_no=\d+",
|
||||
r"https://www.webtoons.com/../.+/.+/list\?title_no=\d+"
|
||||
]
|
||||
_authentication_methods: list[str] = []
|
||||
|
||||
def download(self, url: str) -> Result[str]:
|
||||
if re.match(self.match[0], url):
|
||||
return self._download_episode(url)
|
||||
if re.match(self.match[1], url):
|
||||
return self._download_series(url)
|
||||
raise InvalidUrl
|
||||
|
||||
|
||||
def download_book_from_id(self, book_id: str) -> Book:
|
||||
return self._download_episode(book_id)
|
||||
|
||||
|
||||
def _download_series(self, url: str) -> Series[str]:
|
||||
"""
|
||||
Download a series of webtoons
|
||||
|
||||
:param url: Url of series
|
||||
:returns: Webtoons series data
|
||||
"""
|
||||
parsed_url = urlparse(url)
|
||||
page = self._session.get(
|
||||
f"https://m.webtoons.com{parsed_url.path}",
|
||||
params = parsed_url.query,
|
||||
headers = {
|
||||
"User-Agent": MOBILE_USER_AGENT,
|
||||
},
|
||||
cookies = {
|
||||
"needGDPR": "FALSE",
|
||||
"needCCPA": "FALSE",
|
||||
"needCOPPA": "FALSE"
|
||||
}
|
||||
).text
|
||||
soup = BeautifulSoup(page, "lxml")
|
||||
title = soup.find("meta", property="og:title").get("content")
|
||||
episodes = []
|
||||
for episode in soup.find_all("li", class_="_episodeItem"):
|
||||
episode_link = episode.find("a").get("href")
|
||||
episodes.append(episode_link)
|
||||
return Series(
|
||||
title,
|
||||
book_ids = episodes
|
||||
)
|
||||
|
||||
|
||||
def _download_episode(self, url: str) -> Book:
|
||||
"""
|
||||
Download single webtoon episode
|
||||
|
||||
:param url: Url of episode
|
||||
:returns: Episode
|
||||
"""
|
||||
page = self._session.get(url).text
|
||||
soup = BeautifulSoup(page, "lxml")
|
||||
title = soup.find("h1", class_="subj_episode").get("title")
|
||||
series = soup.find("div", class_="subj_info").find("a").get("title")
|
||||
images = []
|
||||
for image in soup.find("div", class_="viewer_img _img_viewer_area").find_all("img"):
|
||||
images.append(
|
||||
OnlineFile(
|
||||
url = image.get("data-url"),
|
||||
extension = "png",
|
||||
headers = { "Referer": "https://www.webtoons.com/" }
|
||||
)
|
||||
)
|
||||
return Book(
|
||||
data = ImageList(images),
|
||||
metadata = Metadata(
|
||||
title,
|
||||
series = series
|
||||
)
|
||||
)
|
||||
11
grawlix/utils/__init__.py
Normal file
11
grawlix/utils/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
from grawlix.exceptions import DataNotFound
|
||||
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
def get_arg_from_url(url: str, key: str) -> str:
|
||||
parsed_url = urlparse(url)
|
||||
query = parse_qs(parsed_url.query)
|
||||
try:
|
||||
return query[key][0]
|
||||
except:
|
||||
raise DataNotFound
|
||||
49
pyproject.toml
Normal file
49
pyproject.toml
Normal file
@ -0,0 +1,49 @@
|
||||
[project]
|
||||
name = "grawlix"
|
||||
authors = [
|
||||
{ name = "Joakim Holm", email = "mail@joakimholm.xyz" }
|
||||
]
|
||||
description = "CLI tool for downloading ebooks"
|
||||
readme = "README.md"
|
||||
keywords = ["ebook", "cli", "downloader"]
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
]
|
||||
dependencies = [
|
||||
"appdirs",
|
||||
"beautifulsoup4",
|
||||
"blackboxprotobuf",
|
||||
"importlib-resources",
|
||||
"lxml",
|
||||
"pycryptodome",
|
||||
"requests",
|
||||
"rich",
|
||||
"tomli"
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
[project.urls]
|
||||
"Homepage" = "https://github.com/jo1gi/ebook-dl"
|
||||
"Bugtracker" = "https://github.com/jo1gi/ebook-dl/issues"
|
||||
|
||||
[project.scripts]
|
||||
audiobook-dl = "grawlix.__main__:main"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools", "setuptools-scm"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = {attr = "grawlix.__version__"}
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
mypkg = ["*.txt"]
|
||||
|
||||
|
||||
[tool.mypy]
|
||||
ignore_missing_imports = true
|
||||
allow_untyped_globals = false
|
||||
disallow_untyped_calls = true
|
||||
49
shell.nix
Normal file
49
shell.nix
Normal file
@ -0,0 +1,49 @@
|
||||
with import <nixpkgs> {};
|
||||
|
||||
let
|
||||
blackboxprotobuf = python3Packages.buildPythonPackage rec {
|
||||
pname = "blackboxprotobuf";
|
||||
version = "1.0.1";
|
||||
|
||||
src = python3Packages.fetchPypi {
|
||||
inherit pname version;
|
||||
sha256 = "sha256-IztxTmwkzp0cILhxRioiCvkXfk/sAcG3l6xauGoeHOo=";
|
||||
};
|
||||
|
||||
propagatedBuildInputs = with python3Packages; [
|
||||
protobuf
|
||||
];
|
||||
|
||||
patchPhase = ''
|
||||
sed 's/protobuf==3.10.0/protobuf/' requirements.txt > requirements.txt
|
||||
'';
|
||||
|
||||
doCheck = false;
|
||||
};
|
||||
in
|
||||
mkShell {
|
||||
buildInputs = [
|
||||
(python3.withPackages(ps: with ps; [
|
||||
appdirs
|
||||
beautifulsoup4
|
||||
blackboxprotobuf
|
||||
importlib-resources
|
||||
lxml
|
||||
pycryptodome
|
||||
requests
|
||||
rich
|
||||
tomli
|
||||
|
||||
# Test
|
||||
pytest
|
||||
mypy
|
||||
types-requests
|
||||
types-setuptools
|
||||
|
||||
# Build
|
||||
build
|
||||
setuptools
|
||||
twine
|
||||
]))
|
||||
];
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user