This commit is contained in:
Joakim Holm 2023-04-06 21:38:19 +02:00
parent 7bafe32aac
commit d386cdcd88
22 changed files with 1174 additions and 0 deletions

4
grawlix/__init__.py Normal file
View File

@ -0,0 +1,4 @@
from .encryption import Encryption, AESEncryption
from .book import Book, SingleFile, OnlineFile, Metadata
__version__ = "0.1.0"

79
grawlix/__main__.py Normal file
View File

@ -0,0 +1,79 @@
from .book import Book, Series
from .config import load_config, Config, SourceConfig
from .exceptions import SourceNotAuthenticated
from .sources import find_source, Source
from .output import download_book
from . import arguments, logging
from typing import Tuple
from rich.progress import Progress
from functools import partial
def get_login(source: Source, config: Config, options) -> Tuple[str, str]:
"""
Get login credentials for source
:param source: Source to authenticate
:param config: Content of config file
:param options: Command line options
:returns: Login credentials
"""
source_name = source.name.lower()
if source_name in config.sources:
username = config.sources[source_name].username or options.username
password = config.sources[source_name].password or options.password
else:
username = options.username
password = options.password
return username, password
def authenticate(source: Source, config: Config, options):
"""
Authenticate with source
:param source: Source to authenticate
:param config: Content of config file
:param options: Command line options
"""
if source.supports_login:
username, password = get_login(source, config, options)
source.login(username, password)
else:
raise SourceNotAuthenticated
def main() -> None:
args = arguments.parse_arguments()
config = load_config()
for url in args.urls:
source: Source = find_source(url)
if source.requires_authentication:
authenticate(source, config, args)
result = source.download(url)
if isinstance(result, Book):
with logging.progress(result.metadata.title, source.name) as progress:
download_with_progress(result, progress)
elif isinstance(result, Series):
with logging.progress(result.title, source.name, len(result.book_ids)) as progress:
for book_id in result.book_ids:
book = source.download_book_from_id(book_id)
download_with_progress(book, progress)
def download_with_progress(book: Book, progress: Progress):
"""
Download book with progress bar in cli
:param book: Book to download
:param progress: Progress object
"""
task = logging.add_book(progress, book)
update_function = partial(progress.advance, task)
download_book(book, update_function)
progress.advance(task, 1)
if __name__ == "__main__":
main()

37
grawlix/arguments.py Normal file
View File

@ -0,0 +1,37 @@
from grawlix import __version__
import argparse
def parse_arguments():
# Help
parser = argparse.ArgumentParser(
prog = "grawlix",
description = "Download ebooks"
)
parser.add_argument(
'-v',
'--version',
action = "version",
version = f"grawlix {__version__}"
)
# Basics
parser.add_argument(
'urls',
help = "Links to ebooks",
nargs = "*"
)
# Authentication
parser.add_argument(
'-u',
'--username',
help = "Username for login",
dest = "username",
)
parser.add_argument(
'-p',
'--password',
help = "Password for login",
dest = "password",
)
# Outputs
return parser.parse_args()

59
grawlix/book.py Normal file
View File

@ -0,0 +1,59 @@
from grawlix import Encryption
from dataclasses import dataclass
from typing import Optional, Union, TypeVar, Generic
@dataclass(slots=True)
class Metadata:
"""Metadata about a book"""
title: str
series: Optional[str] = None
publisher: Optional[str] = None
identifier: Optional[str] = None
@dataclass(slots=True)
class OnlineFile:
"""Instructions for downloading an online file"""
url: str
extension: str
encryption: Optional[Encryption] = None
headers: Optional[dict[str, str]] = None
@dataclass(slots=True)
class SingleFile:
"""Bookdata in the form of a single file"""
file: OnlineFile
@dataclass(slots=True)
class ImageList:
"""
List of images
Mostly used for comic books
"""
images: list[OnlineFile]
BookData = Union[
SingleFile,
ImageList
]
@dataclass(slots=True)
class Book:
"""Stores information about a book"""
metadata: Metadata
data: BookData
T = TypeVar("T")
@dataclass(slots=True)
class Series(Generic[T]):
"""Stores a series of books"""
title: str
book_ids: list[T]
Result = Union[
Book,
Series[T]
]

41
grawlix/config.py Normal file
View File

@ -0,0 +1,41 @@
from dataclasses import dataclass
from typing import Optional
import tomli
import appdirs
import os
@dataclass(slots=True)
class SourceConfig:
"""Stores configuration for source"""
username: Optional[str]
password: Optional[str]
@dataclass(slots=True)
class Config:
"""Grawlix configuration"""
sources: dict[str, SourceConfig]
def load_config() -> Config:
"""
Load config from disk
:returns: Config object
"""
config_dir = appdirs.user_config_dir("grawlix", "jo1gi")
config_file = os.path.join(config_dir, "grawlix.toml")
if os.path.exists(config_file):
with open(config_file, "rb") as f:
config_dict = tomli.load(f)
else:
config_dict = {}
sources = {}
if "source" in config_dict:
for key, values in config_dict["source"].items():
sources[key] = SourceConfig (
username = values.get("username"),
password = values.get("password"),
)
return Config(sources)

38
grawlix/encryption.py Normal file
View File

@ -0,0 +1,38 @@
from Crypto.Cipher import AES
from typing import Union
from dataclasses import dataclass
@dataclass(slots=True)
class AESEncryption:
key: bytes
iv: bytes
@dataclass(slots=True)
class XOrEncryption:
key: bytes
Encryption = Union[
AESEncryption,
XOrEncryption
]
def decrypt(data: bytes, encryption: Encryption) -> bytes:
"""
Decrypt data with specified encryption algorithm
:param data: Bytes to decrypt
:param encryption: Information about how to decrypt
:returns: Decrypted data
"""
if isinstance(encryption, AESEncryption):
cipher = AES.new(encryption.key, AES.MODE_CBC, encryption.iv)
return cipher.decrypt(data)
if isinstance(encryption, XOrEncryption):
key_length = len(encryption.key)
decoded = []
for i in range(0, len(data)):
decoded.append(data[i] ^ encryption.key[i % key_length])
return bytes(decoded)
raise NotImplemented

20
grawlix/exceptions.py Normal file
View File

@ -0,0 +1,20 @@
class GrawlixError(Exception):
pass
class DataNotFound(GrawlixError):
pass
class InvalidUrl(GrawlixError):
pass
class UnsupportedOutputFormat(GrawlixError):
pass
class NoSourceFound(GrawlixError):
pass
class SourceNotAuthenticated(GrawlixError):
pass
class MissingArgument(GrawlixError):
pass

31
grawlix/logging.py Normal file
View File

@ -0,0 +1,31 @@
from grawlix.book import Book
from rich.console import Console
from rich.progress import Progress, BarColumn, ProgressColumn, TaskID, SpinnerColumn
import rich
from typing import Union
from dataclasses import dataclass
console = Console(stderr=True)
def progress(category_name: str, source_name: str, count=1) -> Progress:
if count > 1:
console.print(f"Downloading [yellow not bold]{count}[/] books in [blue]{category_name}[/] from [magenta]{source_name}[/]")
else:
console.print(f"Downloading [blue bold]{category_name}[/] from [magenta]{source_name}[/]")
progress = Progress(
SpinnerColumn(),
"{task.description}",
BarColumn(),
"[progress.percentage]{task.percentage:>3.0f}%",
console = console
)
return progress
def add_book(progress: Progress, book: Book) -> TaskID:
task = progress.add_task(
f"[blue]{book.metadata.title}[/]",
total = 1
)
return task

View File

@ -0,0 +1,79 @@
from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile
from grawlix.exceptions import GrawlixError
from .output_format import OutputFormat
from .epub import Epub
from .cbz import Cbz
from typing import Callable
from pathlib import Path
import os
def download_book(book: Book, update_func: Callable) -> None:
"""
Download and write book to disk
:param book: Book to download
"""
output_format = get_default_format(book.data)
location = format_output_location(book, output_format)
parent = Path(location).parent
if not parent.exists():
os.makedirs(parent)
if isinstance(book.data, SingleFile):
output_format.dl_single_file(book.data, location, update_func)
elif isinstance(book.data, ImageList):
output_format.dl_image_list(book.data, location, update_func)
else:
raise NotImplementedError
def format_output_location(book: Book, output_format: OutputFormat) -> str:
"""
Create path to output location of book
:param book: Book to download
:param output_format: Output format of book
"""
series = book.metadata.series or "UNKNOWN"
return f"{series}/{book.metadata.title}.{output_format.extension}"
def get_default_format(bookdata: BookData) -> OutputFormat:
"""
Get default output format for bookdata.
Should only be used if no format was specified by the user
:param bookdata: Content of book
:returns: OutputFormat object matching the default
"""
if isinstance(bookdata, SingleFile):
return output_format_from_str(bookdata.file.extension)
if isinstance(bookdata, ImageList):
return Cbz()
raise GrawlixError
def output_format_from_str(name: str) -> OutputFormat:
"""
Convert string to outputformat object
:param name: Name of output format
:returns: OutputFormat object
"""
for output_format in get_output_formats():
if output_format.extension == name:
return output_format()
raise GrawlixError
def get_output_formats() -> list[type[OutputFormat]]:
"""
Get a list of all available output formats
:returns: List of available output format classes
"""
return [
Cbz,
Epub,
]

18
grawlix/output/cbz.py Normal file
View File

@ -0,0 +1,18 @@
from .output_format import OutputFormat, Update
from grawlix.book import ImageList
import zipfile
class Cbz(OutputFormat):
"""Comic book zip file"""
extension: str = "cbz"
def dl_image_list(self, book: ImageList, location: str, update: Update) -> None:
image_count = len(book.images)
with zipfile.ZipFile(location, mode="w") as zip:
for n, file in enumerate(book.images):
content = self._download_file(file)
zip.writestr(f"Image {n}.{file.extension}", content)
if update:
update(1/image_count)

5
grawlix/output/epub.py Normal file
View File

@ -0,0 +1,5 @@
from grawlix.book import Book, SingleFile
from .output_format import OutputFormat
class Epub(OutputFormat):
extension = "epub"

View File

@ -0,0 +1,68 @@
from grawlix.book import Book, SingleFile, OnlineFile, ImageList
from grawlix.exceptions import UnsupportedOutputFormat
from grawlix.encryption import decrypt
import requests
from typing import Callable, Optional
Update = Optional[Callable[[float], None]]
class OutputFormat:
# Extension for output files
extension: str = ""
def __init__(self):
self._session = requests.Session()
def dl_single_file(self, book: SingleFile, location: str, update_func: Update) -> None:
"""
Download and write an `grawlix.SingleFile` to disk
:param book: Book to download
:param location: Path to where the file is written
:raises UnsupportedOutputFormat: If datatype is not supported by format
"""
if not book.file.extension == self.extension:
raise UnsupportedOutputFormat
self._download_and_write_file(book.file, location)
def dl_image_list(self, book: ImageList, location: str, update_func: Update) -> None:
"""
Download and write an `grawlix.ImageList` to disk
:param book: Book to download
:param location: Path to where the file is written
:raises UnsupportedOutputFormat: If datatype is not supported by format
"""
raise UnsupportedOutputFormat
def _download_file(self, file: OnlineFile) -> bytes:
"""
Download `grawlix.OnlineFile`
:param file: File to download
:returns: Content of downloaded file
"""
response = self._session.get(
file.url,
headers = file.headers
)
content = response.content
if file.encryption is not None:
content = decrypt(content, file.encryption)
return content
def _download_and_write_file(self, file: OnlineFile, location: str) -> None:
"""
Download `grawlix.OnlineFile` and write to content to disk
:param file: File to download
:param location: Path to where the file is written
"""
content = self._download_file(file)
with open(location, "wb") as f:
f.write(content)

View File

@ -0,0 +1,38 @@
from grawlix.exceptions import NoSourceFound
from .source import Source
from .flipp import Flipp
from .mangaplus import MangaPlus
from .saxo import Saxo
from .webtoons import Webtoons
import re
def find_source(url: str) -> Source:
"""
Find source that matches url
:param url: Url of book to download
:returns: Source for downloading url
"""
for cls in get_source_classes():
for num, match in enumerate(cls.match):
if re.match(match, url):
source = cls()
return source
raise NoSourceFound
def get_source_classes() -> list[type[Source]]:
"""
Get all source types
:returns: A list of all available source types
"""
return [
Flipp,
MangaPlus,
Saxo,
Webtoons
]

172
grawlix/sources/flipp.py Normal file
View File

@ -0,0 +1,172 @@
from .source import Source
from grawlix.book import Book, Metadata, ImageList, OnlineFile, Series, Result
from grawlix.exceptions import InvalidUrl, DataNotFound
from grawlix.utils import get_arg_from_url
import re
from urllib.parse import urlparse
from typing import Tuple, Optional
BASEURL = "https://reader.flipp.dk/html5/reader"
class Flipp(Source):
name: str = "Flipp"
match = [
r"https?://reader.flipp.dk/html5/reader/production/default.aspx\?pubname=&edid=([^/]+)",
r"https?://magasiner.flipp.dk/flipp/web-app/#/publications/.+"
]
_authentication_methods: list[str] = []
_login_cache: Optional[dict] = None
def download(self, url: str) -> Result:
if re.match(self.match[0], url):
eid = self._get_eid(url)
publication_id = self._get_series_id(eid)
return self._download_book(eid, publication_id)
elif re.match(self.match[1], url):
return self._download_series(url)
raise InvalidUrl
def download_book_from_id(self, book_id: Tuple[str, str]) -> Book:
series_id, issue_id = book_id
return self._download_book(issue_id, series_id)
def _download_series(self, url: str) -> Series:
"""
Download series with book ids from Flipp
:param url: Url of series
:returns: Series object
"""
series_id = url.split("/")[-1]
login_info = self._download_login_info()
series_metadata = self._extract_series_data(login_info, series_id)
issues = []
for issue in series_metadata["issues"]:
issue_id = issue["customIssueCode"]
issues.append((series_id, issue_id))
return Series(
title = series_metadata["name"],
book_ids = issues
)
def _download_login_info(self) -> dict:
"""
Download login info from Flipp
Will use cache if available
:returns: Login info
"""
if self._login_cache:
return self._login_cache
login_info = self._session.post(
"https://flippapi.egmontservice.com/api/signin",
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:111.0) Gecko/20100101 Firefox/111.0"
},
json = {
"email": "",
"password": "",
"token": "",
"languageCulture": "da-DK",
"appId": "",
"appVersion": "",
"uuid": "",
"os": ""
}
).json()
self.login_cache = login_info
return login_info
def _extract_series_data(self, response: dict, series_id: str) -> dict:
"""
Extract metadata about series from login response
:param response: Login response from Flipp
:param series_id: Id of series
:returns: Metadata about series
"""
for publication in response["publications"]:
if publication["customPublicationCode"] == series_id:
return publication
raise DataNotFound
def _download_book(self, issue_id: str, series_id: str) -> Book:
"""
Download book from Flipp
:param issue_id: Issue identifier
:param series_id: Series identifier
:returns: Book metadata
"""
pages = self._get_pages(issue_id, series_id)
metadata = self._get_metadata(issue_id, series_id)
return Book(
data = ImageList(pages),
metadata = Metadata(
title = f"{metadata['series_name']} {metadata['issueName']}",
series = metadata["series_name"],
identifier = issue_id
),
)
def _get_metadata(self, issue_id: str, series_id: str) -> dict:
"""
Download and extract issue data
:param issue_id: Issue id
:param series_id: Series id
:returns: Issue metadata
"""
login_info = self._download_login_info()
series_metadata = self._extract_series_data(login_info, series_id)
for issue in series_metadata["issues"]:
if issue["customIssueCode"] == issue_id:
issue["series_name"] = series_metadata["name"]
return issue
raise DataNotFound
def _get_eid(self, url: str) -> str:
return get_arg_from_url(url, "edid")
def _get_series_id(self, issue_id: str) -> str:
"""
Download series id from issue id
:param issue_id: Issue id
:returns: Series id
"""
response = self._session.get(f"{BASEURL}/production/default.aspx?pubname=&edid={issue_id}")
# TODO Make faster
search = re.search(r'publicationguid = "([^"]+)', response.text)
if search is None:
raise DataNotFound
return search.group(1)
def _get_pages(self, issue_id: str, series_id: str) -> list[OnlineFile]:
"""
Download page metadata for book
:param issue_id: Issue id
:param series_id: Series id
:return: Page image links
"""
response = self._session.get(
f"{BASEURL}/get_page_groups_from_eid.aspx?pubid={series_id}&eid={issue_id}",
)
result = []
for page in response.json()["pageGroups"]:
# Find image id in low quality image url
low_quality_url = urlparse(page["pages"][0]["image"])
image_id = low_quality_url.path[1:-9]
high_quality_url = f"http://pages.cdn.pagesuite.com/{image_id}/highpage.jpg?method=true"
result.append(OnlineFile(high_quality_url, "jpg"))
return result

View File

@ -0,0 +1,102 @@
from .source import Source
from grawlix.encryption import XOrEncryption
from grawlix.book import Book, Metadata, ImageList, OnlineFile, Series, Result
from grawlix.exceptions import InvalidUrl
import re
import blackboxprotobuf
import json
import rich
class MangaPlus(Source):
name: str = "Manga Plus"
match = [
r"https?://mangaplus.shueisha.co.jp/viewer/\d+",
r"https?://mangaplus.shueisha.co.jp/titles/\d+"
]
_authentication_methods: list[str] = []
def download(self, url: str) -> Result:
if re.match(self.match[0], url):
issue_id = url.split('/')[-1]
return self._download_issue(issue_id)
if re.match(self.match[1], url):
series_id = url.split("/")[-1]
return self._download_series(series_id)
raise InvalidUrl
def download_book_from_id(self, book_id: str) -> Book:
return self._download_issue(book_id)
def _download_series(self, series_id: str) -> Series:
"""
Download series from Manga Plus
:param series_id: Identifier for series
:returns: Series data
"""
content = self._session.get(
f"https://jumpg-api.tokyo-cdn.com/api/title_detailV2",
params = {
"title_id": series_id,
"lang": "eng",
"os": "android",
"os_ver": "32",
"app_ver": "40",
"secret": "2afb69fbb05f57a1856cf75e1c4b6ee6"
},
).content
data, _ = blackboxprotobuf.protobuf_to_json(content)
parsed = json.loads(data)
title = parsed["1"]["8"]["1"]["2"]
issues = []
def add_issues(data: dict, main: str):
if main in data:
x = data[main]
if isinstance(x, list):
for i in x:
issues.append(i["2"])
else:
issues.append(x["2"])
for a in parsed["1"]["8"]["28"]:
add_issues(a, "2")
add_issues(a, "3")
add_issues(a, "4")
return Series(
title,
book_ids = issues
)
def _download_issue(self, issue_id: str) -> Book:
"""
Download issue from Manga Plus
:param issue_id: Identifier for issue
:returns: Issue metadata
"""
url = f"https://jumpg-webapi.tokyo-cdn.com/api/manga_viewer?chapter_id={issue_id}&split=yes&img_quality=super_high"
content = self._session.get(url).content
response, _ = blackboxprotobuf.protobuf_to_json(content)
images = []
parsed = json.loads(response)
for image in parsed["1"]["10"]["1"]:
if "1" in image:
images.append(
OnlineFile(
image["1"]["1"],
extension = "jpg",
encryption = XOrEncryption(bytes.fromhex(image["1"]["5"]))
)
)
elif "3" in image:
title = image["3"]["1"]["4"]
return Book(
data = ImageList(images),
metadata = Metadata(
title,
series = parsed["1"]["10"]["5"]
)
)

119
grawlix/sources/saxo.py Normal file
View File

@ -0,0 +1,119 @@
from grawlix.book import Book, Metadata, SingleFile, OnlineFile
from grawlix import AESEncryption
import re
from .source import Source
class Saxo(Source):
name: str = "Saxo"
match = [
r"https://(www.)?saxo.(com|dk)/[^/]+/.+\d+$"
]
_authentication_methods = [ "login" ]
user_id: str
def login(self, username: str, password: str, **kwargs) -> None:
response = self._session.post(
"https://auth-read.saxo.com/auth/token",
data = {
"username": username,
"password": password,
"grant_type": "password",
},
headers = {
"Content-Type": "application/x-www-form-urlencoded"
}
)
json = response.json()
bearer_token = json["access_token"]
self._session.headers = {
"Appauthorization": f"bearer {bearer_token}",
"App-Os": "android",
"App-Version": "6.2.4"
}
self.user_id = json["id"]
def download(self, url: str) -> Book:
isbn = self._extract_isbn_from_url(url)
book_id = self._get_book_id(isbn)
metadata = self._get_book_metadata(book_id)
ebook_id = metadata["id"] # Id of ebook file
return Book(
metadata = self._extract_metadata(metadata),
data = SingleFile(
OnlineFile(
url = self._get_book_file_link(ebook_id),
extension = "epub",
# Encryption keys extracted from app
encryption = AESEncryption(
key = b"CD3E9D141D8EFC0886912E7A8F3652C4",
iv = b"78CB354D377772F1"
)
)
)
)
def _get_book_id(self, isbn: str) -> str:
"""
Download internal book id of book from isbn
:param isbn: Isbn of book
:returns: Saxo internal book id
"""
response = self._session.get(
f"https://api-read.saxo.com/api/v2/search/user/{self.user_id}/premium/books/{isbn}"
)
return response.json()["items"][0]["bookId"]
def _get_book_metadata(self, book_id: str) -> dict:
"""
Download metadata of book
:param book_id: Id of book
:returns: Metadata of book
"""
response = self._session.get(
f"https://api-read.saxo.com/api/v2/book/{book_id}/user/{self.user_id}/details"
)
return response.json()["ebooks"][0]
def _get_book_file_link(self, ebook_id: str) -> str:
"""
Download link to epub file
:param ebook_id: Id of ebook file
:returns: Link to ebook file
"""
response = self._session.get(
f"https://api-read.saxo.com/api/v1/book/{ebook_id}/content/encryptedstream/"
)
return response.json()["link"]
@staticmethod
def _extract_metadata(metadata: dict) -> Metadata:
"""
Extract metadata from matadata response from Saxo
:param metadata: Metadata response from saxo
:returns: Metadata formatted as `grawlix.Metadata`
"""
return Metadata(metadata["title"])
@staticmethod
def _extract_isbn_from_url(url: str) -> str:
"""
Extracts isbn from url
:param url: Url of book
:returns: Isbn of book
"""
isbn_match = re.search(f"\d+$", url)
if isbn_match and isbn_match.group():
return isbn_match.group()
raise NotImplemented

60
grawlix/sources/source.py Normal file
View File

@ -0,0 +1,60 @@
from grawlix.book import Book, Series, Result
from typing import Generic, TypeVar, Tuple
import requests
T = TypeVar("T")
class Source(Generic[T]):
"""
General class for downloading books from various sources
"""
name: str = "UNKNOWN"
match: list[str] = []
_authentication_methods: list[str] = []
def __init__(self):
self._session = requests.Session()
@property
def requires_authentication(self) -> bool:
"""Does the source require authentication to download books"""
return len(self._authentication_methods) > 0
@property
def supports_login(self) -> bool:
"""Does the source support authentication with username and password"""
return "login" in self._authentication_methods
def login(self, username: str, password: str, **kwargs: str):
"""
Login to source
:param username: Username of user for source
:param password: Password of user for source
"""
raise NotImplementedError
def download(self, url: str) -> Result[T]:
"""
Download book metadata from source
:param url: Url of book to download
:returns: Book metadata
"""
raise NotImplementedError
def download_book_from_id(self, book_id: T) -> Book:
"""
Download book from id
:param book_id: Internal id of book
:returns: Downloaded book metadata
"""
raise NotImplementedError

View File

@ -0,0 +1,92 @@
from .source import Source
from grawlix.book import Book, Metadata, ImageList, OnlineFile, Series, Result
from grawlix.utils import get_arg_from_url
from grawlix.exceptions import InvalidUrl
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:111.0) Gecko/20100101 Firefox/111.0"
MOBILE_USER_AGENT = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Mobile/15E148 Safari/604.1"
class Webtoons(Source[str]):
name: str = "Webtoons"
match = [
r"https://www.webtoons.com/../.+/.+/.+/viewer\?title_no=\d+&episode_no=\d+",
r"https://www.webtoons.com/../.+/.+/list\?title_no=\d+"
]
_authentication_methods: list[str] = []
def download(self, url: str) -> Result[str]:
if re.match(self.match[0], url):
return self._download_episode(url)
if re.match(self.match[1], url):
return self._download_series(url)
raise InvalidUrl
def download_book_from_id(self, book_id: str) -> Book:
return self._download_episode(book_id)
def _download_series(self, url: str) -> Series[str]:
"""
Download a series of webtoons
:param url: Url of series
:returns: Webtoons series data
"""
parsed_url = urlparse(url)
page = self._session.get(
f"https://m.webtoons.com{parsed_url.path}",
params = parsed_url.query,
headers = {
"User-Agent": MOBILE_USER_AGENT,
},
cookies = {
"needGDPR": "FALSE",
"needCCPA": "FALSE",
"needCOPPA": "FALSE"
}
).text
soup = BeautifulSoup(page, "lxml")
title = soup.find("meta", property="og:title").get("content")
episodes = []
for episode in soup.find_all("li", class_="_episodeItem"):
episode_link = episode.find("a").get("href")
episodes.append(episode_link)
return Series(
title,
book_ids = episodes
)
def _download_episode(self, url: str) -> Book:
"""
Download single webtoon episode
:param url: Url of episode
:returns: Episode
"""
page = self._session.get(url).text
soup = BeautifulSoup(page, "lxml")
title = soup.find("h1", class_="subj_episode").get("title")
series = soup.find("div", class_="subj_info").find("a").get("title")
images = []
for image in soup.find("div", class_="viewer_img _img_viewer_area").find_all("img"):
images.append(
OnlineFile(
url = image.get("data-url"),
extension = "png",
headers = { "Referer": "https://www.webtoons.com/" }
)
)
return Book(
data = ImageList(images),
metadata = Metadata(
title,
series = series
)
)

11
grawlix/utils/__init__.py Normal file
View File

@ -0,0 +1,11 @@
from grawlix.exceptions import DataNotFound
from urllib.parse import urlparse, parse_qs
def get_arg_from_url(url: str, key: str) -> str:
parsed_url = urlparse(url)
query = parse_qs(parsed_url.query)
try:
return query[key][0]
except:
raise DataNotFound

49
pyproject.toml Normal file
View File

@ -0,0 +1,49 @@
[project]
name = "grawlix"
authors = [
{ name = "Joakim Holm", email = "mail@joakimholm.xyz" }
]
description = "CLI tool for downloading ebooks"
readme = "README.md"
keywords = ["ebook", "cli", "downloader"]
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
]
dependencies = [
"appdirs",
"beautifulsoup4",
"blackboxprotobuf",
"importlib-resources",
"lxml",
"pycryptodome",
"requests",
"rich",
"tomli"
]
dynamic = ["version"]
[project.urls]
"Homepage" = "https://github.com/jo1gi/ebook-dl"
"Bugtracker" = "https://github.com/jo1gi/ebook-dl/issues"
[project.scripts]
audiobook-dl = "grawlix.__main__:main"
[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"
[tool.setuptools.dynamic]
version = {attr = "grawlix.__version__"}
[tool.setuptools.package-data]
mypkg = ["*.txt"]
[tool.mypy]
ignore_missing_imports = true
allow_untyped_globals = false
disallow_untyped_calls = true

3
setup.py Normal file
View File

@ -0,0 +1,3 @@
from setuptools import setup
setup()

49
shell.nix Normal file
View File

@ -0,0 +1,49 @@
with import <nixpkgs> {};
let
blackboxprotobuf = python3Packages.buildPythonPackage rec {
pname = "blackboxprotobuf";
version = "1.0.1";
src = python3Packages.fetchPypi {
inherit pname version;
sha256 = "sha256-IztxTmwkzp0cILhxRioiCvkXfk/sAcG3l6xauGoeHOo=";
};
propagatedBuildInputs = with python3Packages; [
protobuf
];
patchPhase = ''
sed 's/protobuf==3.10.0/protobuf/' requirements.txt > requirements.txt
'';
doCheck = false;
};
in
mkShell {
buildInputs = [
(python3.withPackages(ps: with ps; [
appdirs
beautifulsoup4
blackboxprotobuf
importlib-resources
lxml
pycryptodome
requests
rich
tomli
# Test
pytest
mypy
types-requests
types-setuptools
# Build
build
setuptools
twine
]))
];
}