mirror of
https://github.com/sotam0316/docker-py-revanced.git
synced 2026-04-25 03:48:37 +09:00
✨ Scraper improvements (#488)
This commit is contained in:
@@ -235,8 +235,8 @@ You can use any of the following methods to build.
|
||||
1. Link Format - https://apksos.com/download-app/<package-name>
|
||||
2. Example Link - https://apksos.com/download-app/com.expensemanager
|
||||
4. APKPURE - Supports downloading only latest version
|
||||
1. Link Format - https://d.apkpure.com/b/APK/<package-name>?version=latest
|
||||
2. Example Link - https://d.apkpure.com/b/APK/com.google.android.youtube?version=latest
|
||||
1. Link Format - https://apkpure.net/-/<package-name>
|
||||
2. Example Link - https://apkpure.net/-/com.google.android.youtube
|
||||
5. APKMonk - Supports downloading any available version
|
||||
1. Link Format - https://www.apkmonk.com/app/<package-name>/
|
||||
2. Example Link - https://www.apkmonk.com/app/<package-name>/
|
||||
|
||||
@@ -28,7 +28,7 @@ class ApkMonk(Downloader):
|
||||
handle_request_response(r, page)
|
||||
soup = BeautifulSoup(r.text, bs4_parser)
|
||||
download_scripts = soup.find_all("script", type="text/javascript")
|
||||
key_value_pattern = r'\{"pkg":"([^"]+)","key":"([^"]+)"\}'
|
||||
key_value_pattern = r"pkg=([^&]+)&key=([^']+)"
|
||||
url = None
|
||||
for script in download_scripts:
|
||||
if match := re.search(key_value_pattern, script.text):
|
||||
|
||||
+122
-6
@@ -2,19 +2,135 @@
|
||||
|
||||
from typing import Any, Self
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
|
||||
from src.app import APP
|
||||
from src.downloader.download import Downloader
|
||||
from src.exceptions import APKPureAPKDownloadError
|
||||
from src.utils import bs4_parser, handle_request_response, request_header, request_timeout, slugify
|
||||
|
||||
|
||||
class ApkPure(Downloader):
|
||||
"""Files downloader."""
|
||||
|
||||
def latest_version(self: Self, app: APP, **kwargs: Any) -> tuple[str, str]:
|
||||
"""Function to download whatever the latest version of app from apkmirror.
|
||||
default_archs_priority: tuple[str, ...] = ("arm64-v8a", "armeabi-v7a", "x86_64", "x86")
|
||||
|
||||
@staticmethod
|
||||
def _select_preferred_dl(app: str, apk_dls: list[str], xapk_dls: list[str]) -> tuple[str | None, str | None]:
|
||||
file_name = None
|
||||
app_dl = None
|
||||
if apk_dls:
|
||||
file_name = f"{app}.apk"
|
||||
app_dl = apk_dls[0]
|
||||
elif xapk_dls:
|
||||
file_name = f"{app}.zip"
|
||||
app_dl = xapk_dls[0]
|
||||
return file_name, app_dl
|
||||
|
||||
def _sort_by_priority(self: Self, arch_list: list[str] | tuple[str]) -> list[str]:
|
||||
"""Specifically used to sort the arch list based on order of elements of default archs priority list."""
|
||||
return [darch for darch in self.default_archs_priority if darch in arch_list]
|
||||
|
||||
def _compare_dls(self: Self, dl1: str, dl2: str) -> int:
|
||||
"""Compare two dls of same type (apk or xapk) to prioritise the archs on lower indices."""
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
apk_type1 = parse_qs(urlparse(dl1).query).get("nc")
|
||||
apk_type2 = parse_qs(urlparse(dl2).query).get("nc")
|
||||
if apk_type1 and apk_type2:
|
||||
l1 = len(apk_type1)
|
||||
l2 = len(apk_type2)
|
||||
# Indicates support for multiple archs, hence longer length
|
||||
if l1 > l2:
|
||||
return -1
|
||||
if l1 < l2:
|
||||
return 1
|
||||
# Arrange based on priority list
|
||||
priority = self.global_archs_priority or self.default_archs_priority
|
||||
for arch in priority:
|
||||
if arch in apk_type1 and arch not in apk_type2:
|
||||
return -1
|
||||
if arch not in apk_type1 and arch in apk_type2:
|
||||
return 1
|
||||
elif not apk_type1 and apk_type2:
|
||||
return 1
|
||||
elif apk_type1 and not apk_type2:
|
||||
return -1
|
||||
return 0
|
||||
|
||||
def extract_download_link(self: Self, page: str, app: str) -> tuple[str, str]:
|
||||
"""Function to extract the download link from apkpure download page.
|
||||
|
||||
:param page: Url of the page
|
||||
:param app: Name of the app
|
||||
:return: Tuple of filename and app direct download link
|
||||
"""
|
||||
from functools import cmp_to_key
|
||||
|
||||
logger.debug(f"Extracting download link from\n{page}")
|
||||
r = requests.get(page, headers=request_header, timeout=request_timeout)
|
||||
handle_request_response(r, page)
|
||||
soup = BeautifulSoup(r.text, bs4_parser)
|
||||
apks = soup.select("#version-list a.download-btn")
|
||||
_apk_dls: list[str] = []
|
||||
_xapk_dls: list[str] = []
|
||||
for apk in apks:
|
||||
if _apk_dl := apk.get("href"):
|
||||
if "/b/XAPK/" in _apk_dl:
|
||||
_xapk_dls.append(_apk_dl) # type: ignore # noqa: PGH003
|
||||
else:
|
||||
_apk_dls.append(_apk_dl) # type: ignore # noqa: PGH003
|
||||
_apk_dls.sort(key=cmp_to_key(self._compare_dls))
|
||||
_xapk_dls.sort(key=cmp_to_key(self._compare_dls))
|
||||
file_name, app_dl = self._select_preferred_dl(app, _apk_dls, _xapk_dls)
|
||||
if not file_name or not app_dl:
|
||||
msg = f"Unable to extract link from {app} version list"
|
||||
raise APKPureAPKDownloadError(msg, url=page)
|
||||
if app_version := soup.select_one("span.info-sdk > span"):
|
||||
self.app_version = slugify(app_version.get_text(strip=True))
|
||||
logger.info(f"Will be downloading {app}'s version {self.app_version}...")
|
||||
return file_name, app_dl
|
||||
|
||||
def specific_version(self: Self, app: APP, version: str) -> tuple[str, str]:
|
||||
"""Function to download the specified version of app from apkpure.
|
||||
|
||||
:param app: Name of the application
|
||||
:return: Version of downloaded apk
|
||||
:param version: Version of the application to download
|
||||
:return: Tuple of filename and app direct download link
|
||||
"""
|
||||
file_name = f"{app.app_name}.apk"
|
||||
self._download(app.download_source, file_name)
|
||||
return file_name, app.download_source
|
||||
self.global_archs_priority = tuple(self._sort_by_priority(app.archs_to_build))
|
||||
version_page = app.download_source + "/versions"
|
||||
r = requests.get(version_page, headers=request_header, timeout=request_timeout)
|
||||
handle_request_response(r, version_page)
|
||||
soup = BeautifulSoup(r.text, bs4_parser)
|
||||
version_box_list = soup.select("ul.ver-wrap > *")
|
||||
for box in version_box_list:
|
||||
if (
|
||||
(_data := box.select_one("a.ver_download_link"))
|
||||
and (found_version := _data.get("data-dt-version"))
|
||||
and found_version == version
|
||||
):
|
||||
download_page = _data.get("href")
|
||||
file_name, download_source = self.extract_download_link(download_page, app.app_name) # type: ignore # noqa: PGH003
|
||||
app.app_version = self.app_version
|
||||
logger.info(f"Guessed {app.app_version} for {app.app_name}")
|
||||
self._download(download_source, file_name)
|
||||
return file_name, download_source
|
||||
msg = f"Unable to find specific version '{version}' for {app} from version list"
|
||||
raise APKPureAPKDownloadError(msg, url=version_page)
|
||||
|
||||
def latest_version(self: Self, app: APP, **kwargs: Any) -> tuple[str, str]:
|
||||
"""Function to download whatever the latest version of app from apkpure.
|
||||
|
||||
:param app: Name of the application
|
||||
:return: Tuple of filename and app direct download link
|
||||
"""
|
||||
self.global_archs_priority = tuple(self._sort_by_priority(app.archs_to_build))
|
||||
download_page = app.download_source + "/download"
|
||||
file_name, download_source = self.extract_download_link(download_page, app.app_name)
|
||||
app.app_version = self.app_version
|
||||
logger.info(f"Guessed {app.app_version} for {app.app_name}")
|
||||
self._download(download_source, file_name)
|
||||
return file_name, download_source
|
||||
|
||||
@@ -26,8 +26,10 @@ class ApkSos(Downloader):
|
||||
download_button = soup.find(class_="col-sm-12 col-md-8 text-center")
|
||||
possible_links = download_button.find_all("a") # type: ignore[union-attr]
|
||||
for possible_link in possible_links:
|
||||
if possible_link.get("href"):
|
||||
if possible_link.get("href") and (_title := possible_link.get("title")):
|
||||
file_name = f"{app}.apk"
|
||||
if _title.endswith("Bundle"):
|
||||
file_name = f"{app}.zip"
|
||||
self._download(possible_link["href"], file_name)
|
||||
return file_name, possible_link["href"]
|
||||
msg = f"Unable to download {app}"
|
||||
|
||||
@@ -11,7 +11,6 @@ from src.downloader.google_drive import GoogleDrive
|
||||
from src.downloader.sources import (
|
||||
APK_MIRROR_BASE_URL,
|
||||
APK_MONK_BASE_URL,
|
||||
APK_PURE_BASE_APK_URL,
|
||||
APK_PURE_BASE_URL,
|
||||
APKS_SOS_BASE_URL,
|
||||
DRIVE_DOWNLOAD_BASE_URL,
|
||||
@@ -36,7 +35,7 @@ class DownloaderFactory(object):
|
||||
"""
|
||||
if apk_source.startswith(GITHUB_BASE_URL):
|
||||
return Github(config)
|
||||
if apk_source.startswith((APK_PURE_BASE_URL, APK_PURE_BASE_APK_URL)):
|
||||
if apk_source.startswith(APK_PURE_BASE_URL):
|
||||
return ApkPure(config)
|
||||
if apk_source.startswith(APKS_SOS_BASE_URL):
|
||||
return ApkSos(config)
|
||||
|
||||
@@ -5,10 +5,9 @@ APK_MIRROR_BASE_APK_URL = f"{APK_MIRROR_BASE_URL}/apk"
|
||||
APK_MIRROR_PACKAGE_URL = f"{APK_MIRROR_BASE_URL}/?s=" + "{}"
|
||||
APK_MIRROR_APK_CHECK = f"{APK_MIRROR_BASE_URL}/wp-json/apkm/v1/app_exists/"
|
||||
UPTODOWN_SUFFIX = "en.uptodown.com/android"
|
||||
UPTODOWN_BASE_URL = "https://{}.en.uptodown.com/android"
|
||||
UPTODOWN_BASE_URL = "https://{}." + UPTODOWN_SUFFIX
|
||||
APK_PURE_BASE_URL = "https://apkpure.net"
|
||||
APK_PURE_BASE_APK_URL = "https://d.apkpure.net/b/APK"
|
||||
APK_PURE_URL = APK_PURE_BASE_APK_URL + "/{}?version=latest"
|
||||
APK_PURE_URL = APK_PURE_BASE_URL + "/-/{}"
|
||||
APK_PURE_ICON_URL = APK_PURE_BASE_URL + "/search?q={}"
|
||||
APKS_SOS_BASE_URL = "https://apksos.com/download-app"
|
||||
APK_SOS_URL = APKS_SOS_BASE_URL + "/{}"
|
||||
|
||||
Reference in New Issue
Block a user