mirror of
https://github.com/sotam0316/docker-py-revanced.git
synced 2026-04-25 03:48:37 +09:00
🎨 Use BS4 for scrapping APKMirror
This commit is contained in:
+68
-59
@@ -1,62 +1,84 @@
|
|||||||
"""Downloader Class."""
|
"""Downloader Class."""
|
||||||
import re
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from selectolax.lexbor import LexborHTMLParser
|
|
||||||
|
|
||||||
|
from scripts.status_check import headers
|
||||||
from src.downloader.download import Downloader
|
from src.downloader.download import Downloader
|
||||||
from src.exceptions import AppNotFound
|
from src.exceptions import AppNotFound
|
||||||
|
from src.utils import apkmirror_status_check, bs4_parser
|
||||||
|
|
||||||
|
|
||||||
class ApkMirror(Downloader):
|
class ApkMirror(Downloader):
|
||||||
"""Files downloader."""
|
"""Files downloader."""
|
||||||
|
|
||||||
def extract_download_link(self, page: str, app: str) -> None:
|
def _extract_force_download_link(self, link: str, app: str) -> None:
|
||||||
|
"""Extract force download link."""
|
||||||
|
r = requests.get(link, headers=headers)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise AppNotFound(f"Unable to connect with {link} on ApkMirror.")
|
||||||
|
soup = BeautifulSoup(r.text, bs4_parser)
|
||||||
|
notes_divs = soup.find(class_="tab-pane")
|
||||||
|
possible_links = notes_divs.find_all("a")
|
||||||
|
for possible_link in possible_links:
|
||||||
|
if possible_link.get("href") and "download.php?id=" in possible_link.get(
|
||||||
|
"href"
|
||||||
|
):
|
||||||
|
return self._download(
|
||||||
|
self.config.apk_mirror + possible_link["href"], f"{app}.apk"
|
||||||
|
)
|
||||||
|
raise AppNotFound(f"Unable to download apk from {link}")
|
||||||
|
|
||||||
|
def extract_download_link(self, main_page: str, app: str) -> None:
|
||||||
"""Function to extract the download link from apkmirror html page.
|
"""Function to extract the download link from apkmirror html page.
|
||||||
|
|
||||||
:param page: Url of the page
|
:param main_page: Url of the page
|
||||||
:param app: Name of the app
|
:param app: Name of the app
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Extracting download link from\n{page}")
|
logger.debug(f"Extracting download link from\n{main_page}")
|
||||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
r = requests.get(main_page, headers=headers)
|
||||||
|
if r.status_code != 200:
|
||||||
resp = self.config.session.get(
|
raise AppNotFound(f"Unable to connect with {main_page} on ApkMirror.")
|
||||||
self.config.apk_mirror + parser.css_first("a.accent_bg").attributes["href"]
|
soup = BeautifulSoup(r.text, bs4_parser)
|
||||||
|
download_button = soup.find(class_="center")
|
||||||
|
download_links = download_button.find_all("a")
|
||||||
|
final_download_link = None
|
||||||
|
for download_link in download_links:
|
||||||
|
if download_link.get("href"):
|
||||||
|
if "download/?key=" in download_link.get("href"):
|
||||||
|
final_download_link = download_link["href"]
|
||||||
|
break
|
||||||
|
if not final_download_link:
|
||||||
|
raise AppNotFound(f"Unable to download apk from {main_page}")
|
||||||
|
self._extract_force_download_link(
|
||||||
|
self.config.apk_mirror + final_download_link, app
|
||||||
)
|
)
|
||||||
parser = LexborHTMLParser(resp.text)
|
|
||||||
|
|
||||||
href = parser.css_first(
|
def get_download_page(self, main_page: str) -> str:
|
||||||
"p.notes:nth-child(3) > span:nth-child(1) > a:nth-child(1)"
|
|
||||||
).attributes["href"]
|
|
||||||
self._download(self.config.apk_mirror + href, f"{app}.apk")
|
|
||||||
|
|
||||||
def get_download_page(self, parser: LexborHTMLParser, main_page: str) -> str:
|
|
||||||
"""Function to get the download page in apk_mirror.
|
"""Function to get the download page in apk_mirror.
|
||||||
|
|
||||||
:param parser: Parser
|
:param parser: Parser
|
||||||
:param main_page: Main Download Page in APK mirror(Index)
|
:param main_page: Main Download Page in APK mirror(Index)
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Getting download page from {main_page}")
|
r = requests.get(main_page, headers=headers)
|
||||||
apm = parser.css(".apkm-badge")
|
if r.status_code != 200:
|
||||||
sub_url = ""
|
raise AppNotFound(f"Unable to connect with {main_page} on ApkMirror.")
|
||||||
for is_apm in apm:
|
soup = BeautifulSoup(r.text, bs4_parser)
|
||||||
parent_text = is_apm.parent.parent.text()
|
list_widget = soup.find(class_="listWidget")
|
||||||
if "APK" in is_apm.text() and (
|
table_rows = list_widget.find_all(class_="table-row")
|
||||||
"arm64-v8a" in parent_text
|
sub_url = None
|
||||||
or "universal" in parent_text
|
for row in table_rows:
|
||||||
or "noarch" in parent_text
|
if row.find(class_="accent_color"):
|
||||||
):
|
apk_type = row.find(class_="apkm-badge").get_text()
|
||||||
parser = is_apm.parent
|
if apk_type == "APK":
|
||||||
sub_url = parser.css_first(".accent_color").attributes["href"]
|
sub_url = row.find(class_="accent_color")["href"]
|
||||||
break
|
break
|
||||||
if sub_url == "":
|
if not sub_url:
|
||||||
logger.exception(
|
raise AppNotFound("Unable to download apk from APKMirror.")
|
||||||
f"Unable to find any apk on apkmirror_specific_version on {main_page}"
|
return f"{self.config.apk_mirror}{sub_url}"
|
||||||
)
|
|
||||||
raise AppNotFound("Unable to find apk on apkmirror site.")
|
|
||||||
return self.config.apk_mirror + sub_url
|
|
||||||
|
|
||||||
def specific_version(self, app: str, version: str) -> None:
|
def specific_version(self, app: str, version: str) -> None:
|
||||||
"""Function to download the specified version of app from apkmirror.
|
"""Function to download the specified version of app from apkmirror.
|
||||||
@@ -67,10 +89,7 @@ class ApkMirror(Downloader):
|
|||||||
"""
|
"""
|
||||||
version = version.replace(".", "-")
|
version = version.replace(".", "-")
|
||||||
main_page = f"{self.config.apk_mirror_version_urls.get(app)}-{version}-release/"
|
main_page = f"{self.config.apk_mirror_version_urls.get(app)}-{version}-release/"
|
||||||
parser = LexborHTMLParser(
|
download_page = self.get_download_page(main_page)
|
||||||
self.config.session.get(main_page, allow_redirects=True).text
|
|
||||||
)
|
|
||||||
download_page = self.get_download_page(parser, main_page)
|
|
||||||
self.extract_download_link(download_page, app)
|
self.extract_download_link(download_page, app)
|
||||||
|
|
||||||
def latest_version(self, app: str, **kwargs: Any) -> None:
|
def latest_version(self, app: str, **kwargs: Any) -> None:
|
||||||
@@ -80,24 +99,14 @@ class ApkMirror(Downloader):
|
|||||||
:param app: Name of the application
|
:param app: Name of the application
|
||||||
:return: Version of downloaded apk
|
:return: Version of downloaded apk
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Trying to download {app}'s latest version from apkmirror")
|
from src.patches import Patches
|
||||||
page = self.config.apk_mirror_urls.get(app)
|
|
||||||
if not page:
|
package_name = Patches.get_package_name(app)
|
||||||
logger.debug("Invalid app")
|
response = apkmirror_status_check(package_name)
|
||||||
raise AppNotFound("Invalid app")
|
if response["data"][0]["exists"]:
|
||||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
version = response["data"][0]["release"]["version"]
|
||||||
try:
|
logger.debug(
|
||||||
main_page = parser.css_first(".appRowVariantTag>.accent_color").attributes[
|
f"Trying to download {app}'s latest version({version}) from apkmirror"
|
||||||
"href"
|
)
|
||||||
]
|
return self.specific_version(app, version)
|
||||||
except AttributeError:
|
raise AppNotFound("App not found on apkmirror.")
|
||||||
# Handles a case when variants are not available
|
|
||||||
main_page = parser.css_first(".downloadLink").attributes["href"]
|
|
||||||
match = re.search(r"\d", main_page)
|
|
||||||
if not match:
|
|
||||||
logger.error("Cannot find app main page")
|
|
||||||
raise AppNotFound()
|
|
||||||
main_page = f"{self.config.apk_mirror}{main_page}"
|
|
||||||
parser = LexborHTMLParser(self.config.session.get(main_page).text)
|
|
||||||
download_page = self.get_download_page(parser, main_page)
|
|
||||||
self.extract_download_link(download_page, app)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user