mirror of
https://github.com/sotam0316/docker-py-revanced.git
synced 2026-04-25 03:48:37 +09:00
Merge pull request #284 from nikhilbadyal/feature/280-use-beautifulsoup4-for-scrapping
🎨 Use BS4 for scrapping
This commit is contained in:
@@ -4,5 +4,4 @@ lastversion==3.0.1
|
||||
loguru==0.7.0
|
||||
pre-commit==3.3.3
|
||||
requests==2.31.0
|
||||
selectolax==0.3.16
|
||||
tqdm==4.66.1
|
||||
|
||||
+68
-59
@@ -1,62 +1,84 @@
|
||||
"""Downloader Class."""
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
from selectolax.lexbor import LexborHTMLParser
|
||||
|
||||
from scripts.status_check import headers
|
||||
from src.downloader.download import Downloader
|
||||
from src.exceptions import AppNotFound
|
||||
from src.utils import apkmirror_status_check, bs4_parser
|
||||
|
||||
|
||||
class ApkMirror(Downloader):
|
||||
"""Files downloader."""
|
||||
|
||||
def extract_download_link(self, page: str, app: str) -> None:
|
||||
def _extract_force_download_link(self, link: str, app: str) -> None:
|
||||
"""Extract force download link."""
|
||||
r = requests.get(link, headers=headers)
|
||||
if r.status_code != 200:
|
||||
raise AppNotFound(f"Unable to connect with {link} on ApkMirror.")
|
||||
soup = BeautifulSoup(r.text, bs4_parser)
|
||||
notes_divs = soup.find(class_="tab-pane")
|
||||
possible_links = notes_divs.find_all("a")
|
||||
for possible_link in possible_links:
|
||||
if possible_link.get("href") and "download.php?id=" in possible_link.get(
|
||||
"href"
|
||||
):
|
||||
return self._download(
|
||||
self.config.apk_mirror + possible_link["href"], f"{app}.apk"
|
||||
)
|
||||
raise AppNotFound(f"Unable to download apk from {link}")
|
||||
|
||||
def extract_download_link(self, main_page: str, app: str) -> None:
|
||||
"""Function to extract the download link from apkmirror html page.
|
||||
|
||||
:param page: Url of the page
|
||||
:param main_page: Url of the page
|
||||
:param app: Name of the app
|
||||
"""
|
||||
logger.debug(f"Extracting download link from\n{page}")
|
||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
||||
|
||||
resp = self.config.session.get(
|
||||
self.config.apk_mirror + parser.css_first("a.accent_bg").attributes["href"]
|
||||
logger.debug(f"Extracting download link from\n{main_page}")
|
||||
r = requests.get(main_page, headers=headers)
|
||||
if r.status_code != 200:
|
||||
raise AppNotFound(f"Unable to connect with {main_page} on ApkMirror.")
|
||||
soup = BeautifulSoup(r.text, bs4_parser)
|
||||
download_button = soup.find(class_="center")
|
||||
download_links = download_button.find_all("a")
|
||||
final_download_link = None
|
||||
for download_link in download_links:
|
||||
if download_link.get("href"):
|
||||
if "download/?key=" in download_link.get("href"):
|
||||
final_download_link = download_link["href"]
|
||||
break
|
||||
if not final_download_link:
|
||||
raise AppNotFound(f"Unable to download apk from {main_page}")
|
||||
self._extract_force_download_link(
|
||||
self.config.apk_mirror + final_download_link, app
|
||||
)
|
||||
parser = LexborHTMLParser(resp.text)
|
||||
|
||||
href = parser.css_first(
|
||||
"p.notes:nth-child(3) > span:nth-child(1) > a:nth-child(1)"
|
||||
).attributes["href"]
|
||||
self._download(self.config.apk_mirror + href, f"{app}.apk")
|
||||
|
||||
def get_download_page(self, parser: LexborHTMLParser, main_page: str) -> str:
|
||||
def get_download_page(self, main_page: str) -> str:
|
||||
"""Function to get the download page in apk_mirror.
|
||||
|
||||
:param parser: Parser
|
||||
:param main_page: Main Download Page in APK mirror(Index)
|
||||
:return:
|
||||
"""
|
||||
logger.debug(f"Getting download page from {main_page}")
|
||||
apm = parser.css(".apkm-badge")
|
||||
sub_url = ""
|
||||
for is_apm in apm:
|
||||
parent_text = is_apm.parent.parent.text()
|
||||
if "APK" in is_apm.text() and (
|
||||
"arm64-v8a" in parent_text
|
||||
or "universal" in parent_text
|
||||
or "noarch" in parent_text
|
||||
):
|
||||
parser = is_apm.parent
|
||||
sub_url = parser.css_first(".accent_color").attributes["href"]
|
||||
r = requests.get(main_page, headers=headers)
|
||||
if r.status_code != 200:
|
||||
raise AppNotFound(f"Unable to connect with {main_page} on ApkMirror.")
|
||||
soup = BeautifulSoup(r.text, bs4_parser)
|
||||
list_widget = soup.find(class_="listWidget")
|
||||
table_rows = list_widget.find_all(class_="table-row")
|
||||
sub_url = None
|
||||
for row in table_rows:
|
||||
if row.find(class_="accent_color"):
|
||||
apk_type = row.find(class_="apkm-badge").get_text()
|
||||
if apk_type == "APK":
|
||||
sub_url = row.find(class_="accent_color")["href"]
|
||||
break
|
||||
if sub_url == "":
|
||||
logger.exception(
|
||||
f"Unable to find any apk on apkmirror_specific_version on {main_page}"
|
||||
)
|
||||
raise AppNotFound("Unable to find apk on apkmirror site.")
|
||||
return self.config.apk_mirror + sub_url
|
||||
if not sub_url:
|
||||
raise AppNotFound("Unable to download apk from APKMirror.")
|
||||
return f"{self.config.apk_mirror}{sub_url}"
|
||||
|
||||
def specific_version(self, app: str, version: str) -> None:
|
||||
"""Function to download the specified version of app from apkmirror.
|
||||
@@ -67,10 +89,7 @@ class ApkMirror(Downloader):
|
||||
"""
|
||||
version = version.replace(".", "-")
|
||||
main_page = f"{self.config.apk_mirror_version_urls.get(app)}-{version}-release/"
|
||||
parser = LexborHTMLParser(
|
||||
self.config.session.get(main_page, allow_redirects=True).text
|
||||
)
|
||||
download_page = self.get_download_page(parser, main_page)
|
||||
download_page = self.get_download_page(main_page)
|
||||
self.extract_download_link(download_page, app)
|
||||
|
||||
def latest_version(self, app: str, **kwargs: Any) -> None:
|
||||
@@ -80,24 +99,14 @@ class ApkMirror(Downloader):
|
||||
:param app: Name of the application
|
||||
:return: Version of downloaded apk
|
||||
"""
|
||||
logger.debug(f"Trying to download {app}'s latest version from apkmirror")
|
||||
page = self.config.apk_mirror_urls.get(app)
|
||||
if not page:
|
||||
logger.debug("Invalid app")
|
||||
raise AppNotFound("Invalid app")
|
||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
||||
try:
|
||||
main_page = parser.css_first(".appRowVariantTag>.accent_color").attributes[
|
||||
"href"
|
||||
]
|
||||
except AttributeError:
|
||||
# Handles a case when variants are not available
|
||||
main_page = parser.css_first(".downloadLink").attributes["href"]
|
||||
match = re.search(r"\d", main_page)
|
||||
if not match:
|
||||
logger.error("Cannot find app main page")
|
||||
raise AppNotFound()
|
||||
main_page = f"{self.config.apk_mirror}{main_page}"
|
||||
parser = LexborHTMLParser(self.config.session.get(main_page).text)
|
||||
download_page = self.get_download_page(parser, main_page)
|
||||
self.extract_download_link(download_page, app)
|
||||
from src.patches import Patches
|
||||
|
||||
package_name = Patches.get_package_name(app)
|
||||
response = apkmirror_status_check(package_name)
|
||||
if response["data"][0]["exists"]:
|
||||
version = response["data"][0]["release"]["version"]
|
||||
logger.debug(
|
||||
f"Trying to download {app}'s latest version({version}) from apkmirror"
|
||||
)
|
||||
return self.specific_version(app, version)
|
||||
raise AppNotFound("App not found on apkmirror.")
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
"""APK SOS Downloader Class."""
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
from selectolax.lexbor import LexborHTMLParser
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from scripts.status_check import headers
|
||||
from src.downloader.download import Downloader
|
||||
from src.exceptions import AppNotFound
|
||||
from src.utils import bs4_parser
|
||||
|
||||
|
||||
class ApkSos(Downloader):
|
||||
@@ -16,13 +19,14 @@ class ApkSos(Downloader):
|
||||
:param page: Url of the page
|
||||
:param app: Name of the app
|
||||
"""
|
||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
||||
download_url = parser.css_first(
|
||||
r"body > div > div > div > div > div.col-sm-12.col-md-8 > div.card.fluid.\.idma > "
|
||||
"div.section.row > div.col-sm-12.col-md-8.text-center > p > a"
|
||||
).attributes["href"]
|
||||
self._download(download_url, f"{app}.apk")
|
||||
logger.debug(f"Downloaded {app} apk from apk_combo_downloader in rt")
|
||||
r = requests.get(page, headers=headers, allow_redirects=True)
|
||||
soup = BeautifulSoup(r.text, bs4_parser)
|
||||
download_button = soup.find(class_="col-sm-12 col-md-8 text-center")
|
||||
possible_links = download_button.find_all("a")
|
||||
for possible_link in possible_links:
|
||||
if possible_link.get("href"):
|
||||
return self._download(possible_link["href"], f"{app}.apk")
|
||||
raise AppNotFound("Unable to download apk from apk_combo")
|
||||
|
||||
def latest_version(self, app: str, **kwargs: Any) -> None:
|
||||
"""Function to download whatever the latest version of app from
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
"""Upto Down Downloader."""
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
from selectolax.lexbor import LexborHTMLParser
|
||||
|
||||
from scripts.status_check import headers
|
||||
from src.downloader.download import Downloader
|
||||
from src.exceptions import AppNotFound
|
||||
from src.utils import bs4_parser
|
||||
@@ -14,9 +15,12 @@ class UptoDown(Downloader):
|
||||
"""Files downloader."""
|
||||
|
||||
def extract_download_link(self, page: str, app: str) -> None:
|
||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
||||
main_page = parser.css_first("#detail-download-button")
|
||||
download_url = main_page.attributes["data-url"]
|
||||
r = requests.get(page, headers=headers, allow_redirects=True)
|
||||
soup = BeautifulSoup(r.text, bs4_parser)
|
||||
soup = soup.find(id="detail-download-button")
|
||||
download_url = soup.get("data-url")
|
||||
if not download_url:
|
||||
raise AppNotFound("Unable to download from uptodown.")
|
||||
self._download(download_url, f"{app}.apk")
|
||||
logger.debug(f"Downloaded {app} apk from upto_down_downloader in rt")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user