mirror of
https://github.com/jo1gi/grawlix.git
synced 2026-06-05 05:54:56 -06:00
Update Internet Archive metadata retrieval
Should fix issue 17
This commit is contained in:
parent
f154be5c25
commit
465abbecad
@ -1,10 +1,13 @@
|
|||||||
from grawlix.book import Book, SingleFile, Metadata, OfflineFile
|
from grawlix.book import Book, SingleFile, Metadata, OfflineFile
|
||||||
|
from grawlix.exceptions import DataNotFound
|
||||||
|
from grawlix import logging
|
||||||
from .source import Source
|
from .source import Source
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
|
|
||||||
class InternetArchive(Source):
|
class InternetArchive(Source):
|
||||||
name: str = "Internet Archive"
|
name: str = "Internet Archive"
|
||||||
@ -104,8 +107,10 @@ class InternetArchive(Source):
|
|||||||
f"https://archive.org/details/{book_id}"
|
f"https://archive.org/details/{book_id}"
|
||||||
)
|
)
|
||||||
soup = BeautifulSoup(page_response.text, "lxml")
|
soup = BeautifulSoup(page_response.text, "lxml")
|
||||||
metadata_url = soup.find("ia-book-theater").get("bookmanifesturl")
|
reader_data = json.loads(soup.find(class_="js-bookreader").get("value"))
|
||||||
|
metadata_url = f"https:{reader_data['url']}"
|
||||||
|
logging.debug(f"{metadata_url=}")
|
||||||
metadata_response = await self._client.get(
|
metadata_response = await self._client.get(
|
||||||
f"https:{metadata_url}"
|
metadata_url
|
||||||
)
|
)
|
||||||
return metadata_response.json()["data"]["metadata"]
|
return metadata_response.json()["data"]["metadata"]
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user