mirror of
https://github.com/sotam0316/docker-py-revanced.git
synced 2026-04-25 03:48:37 +09:00
Merge pull request #284 from nikhilbadyal/feature/280-use-beautifulsoup4-for-scrapping
🎨 Use BS4 for scrapping
This commit is contained in:
@@ -4,5 +4,4 @@ lastversion==3.0.1
|
|||||||
loguru==0.7.0
|
loguru==0.7.0
|
||||||
pre-commit==3.3.3
|
pre-commit==3.3.3
|
||||||
requests==2.31.0
|
requests==2.31.0
|
||||||
selectolax==0.3.16
|
|
||||||
tqdm==4.66.1
|
tqdm==4.66.1
|
||||||
|
|||||||
+69
-60
@@ -1,62 +1,84 @@
|
|||||||
"""Downloader Class."""
|
"""Downloader Class."""
|
||||||
import re
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from selectolax.lexbor import LexborHTMLParser
|
|
||||||
|
|
||||||
|
from scripts.status_check import headers
|
||||||
from src.downloader.download import Downloader
|
from src.downloader.download import Downloader
|
||||||
from src.exceptions import AppNotFound
|
from src.exceptions import AppNotFound
|
||||||
|
from src.utils import apkmirror_status_check, bs4_parser
|
||||||
|
|
||||||
|
|
||||||
class ApkMirror(Downloader):
|
class ApkMirror(Downloader):
|
||||||
"""Files downloader."""
|
"""Files downloader."""
|
||||||
|
|
||||||
def extract_download_link(self, page: str, app: str) -> None:
|
def _extract_force_download_link(self, link: str, app: str) -> None:
|
||||||
|
"""Extract force download link."""
|
||||||
|
r = requests.get(link, headers=headers)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise AppNotFound(f"Unable to connect with {link} on ApkMirror.")
|
||||||
|
soup = BeautifulSoup(r.text, bs4_parser)
|
||||||
|
notes_divs = soup.find(class_="tab-pane")
|
||||||
|
possible_links = notes_divs.find_all("a")
|
||||||
|
for possible_link in possible_links:
|
||||||
|
if possible_link.get("href") and "download.php?id=" in possible_link.get(
|
||||||
|
"href"
|
||||||
|
):
|
||||||
|
return self._download(
|
||||||
|
self.config.apk_mirror + possible_link["href"], f"{app}.apk"
|
||||||
|
)
|
||||||
|
raise AppNotFound(f"Unable to download apk from {link}")
|
||||||
|
|
||||||
|
def extract_download_link(self, main_page: str, app: str) -> None:
|
||||||
"""Function to extract the download link from apkmirror html page.
|
"""Function to extract the download link from apkmirror html page.
|
||||||
|
|
||||||
:param page: Url of the page
|
:param main_page: Url of the page
|
||||||
:param app: Name of the app
|
:param app: Name of the app
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Extracting download link from\n{page}")
|
logger.debug(f"Extracting download link from\n{main_page}")
|
||||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
r = requests.get(main_page, headers=headers)
|
||||||
|
if r.status_code != 200:
|
||||||
resp = self.config.session.get(
|
raise AppNotFound(f"Unable to connect with {main_page} on ApkMirror.")
|
||||||
self.config.apk_mirror + parser.css_first("a.accent_bg").attributes["href"]
|
soup = BeautifulSoup(r.text, bs4_parser)
|
||||||
|
download_button = soup.find(class_="center")
|
||||||
|
download_links = download_button.find_all("a")
|
||||||
|
final_download_link = None
|
||||||
|
for download_link in download_links:
|
||||||
|
if download_link.get("href"):
|
||||||
|
if "download/?key=" in download_link.get("href"):
|
||||||
|
final_download_link = download_link["href"]
|
||||||
|
break
|
||||||
|
if not final_download_link:
|
||||||
|
raise AppNotFound(f"Unable to download apk from {main_page}")
|
||||||
|
self._extract_force_download_link(
|
||||||
|
self.config.apk_mirror + final_download_link, app
|
||||||
)
|
)
|
||||||
parser = LexborHTMLParser(resp.text)
|
|
||||||
|
|
||||||
href = parser.css_first(
|
def get_download_page(self, main_page: str) -> str:
|
||||||
"p.notes:nth-child(3) > span:nth-child(1) > a:nth-child(1)"
|
|
||||||
).attributes["href"]
|
|
||||||
self._download(self.config.apk_mirror + href, f"{app}.apk")
|
|
||||||
|
|
||||||
def get_download_page(self, parser: LexborHTMLParser, main_page: str) -> str:
|
|
||||||
"""Function to get the download page in apk_mirror.
|
"""Function to get the download page in apk_mirror.
|
||||||
|
|
||||||
:param parser: Parser
|
:param parser: Parser
|
||||||
:param main_page: Main Download Page in APK mirror(Index)
|
:param main_page: Main Download Page in APK mirror(Index)
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Getting download page from {main_page}")
|
r = requests.get(main_page, headers=headers)
|
||||||
apm = parser.css(".apkm-badge")
|
if r.status_code != 200:
|
||||||
sub_url = ""
|
raise AppNotFound(f"Unable to connect with {main_page} on ApkMirror.")
|
||||||
for is_apm in apm:
|
soup = BeautifulSoup(r.text, bs4_parser)
|
||||||
parent_text = is_apm.parent.parent.text()
|
list_widget = soup.find(class_="listWidget")
|
||||||
if "APK" in is_apm.text() and (
|
table_rows = list_widget.find_all(class_="table-row")
|
||||||
"arm64-v8a" in parent_text
|
sub_url = None
|
||||||
or "universal" in parent_text
|
for row in table_rows:
|
||||||
or "noarch" in parent_text
|
if row.find(class_="accent_color"):
|
||||||
):
|
apk_type = row.find(class_="apkm-badge").get_text()
|
||||||
parser = is_apm.parent
|
if apk_type == "APK":
|
||||||
sub_url = parser.css_first(".accent_color").attributes["href"]
|
sub_url = row.find(class_="accent_color")["href"]
|
||||||
break
|
break
|
||||||
if sub_url == "":
|
if not sub_url:
|
||||||
logger.exception(
|
raise AppNotFound("Unable to download apk from APKMirror.")
|
||||||
f"Unable to find any apk on apkmirror_specific_version on {main_page}"
|
return f"{self.config.apk_mirror}{sub_url}"
|
||||||
)
|
|
||||||
raise AppNotFound("Unable to find apk on apkmirror site.")
|
|
||||||
return self.config.apk_mirror + sub_url
|
|
||||||
|
|
||||||
def specific_version(self, app: str, version: str) -> None:
|
def specific_version(self, app: str, version: str) -> None:
|
||||||
"""Function to download the specified version of app from apkmirror.
|
"""Function to download the specified version of app from apkmirror.
|
||||||
@@ -67,10 +89,7 @@ class ApkMirror(Downloader):
|
|||||||
"""
|
"""
|
||||||
version = version.replace(".", "-")
|
version = version.replace(".", "-")
|
||||||
main_page = f"{self.config.apk_mirror_version_urls.get(app)}-{version}-release/"
|
main_page = f"{self.config.apk_mirror_version_urls.get(app)}-{version}-release/"
|
||||||
parser = LexborHTMLParser(
|
download_page = self.get_download_page(main_page)
|
||||||
self.config.session.get(main_page, allow_redirects=True).text
|
|
||||||
)
|
|
||||||
download_page = self.get_download_page(parser, main_page)
|
|
||||||
self.extract_download_link(download_page, app)
|
self.extract_download_link(download_page, app)
|
||||||
|
|
||||||
def latest_version(self, app: str, **kwargs: Any) -> None:
|
def latest_version(self, app: str, **kwargs: Any) -> None:
|
||||||
@@ -80,24 +99,14 @@ class ApkMirror(Downloader):
|
|||||||
:param app: Name of the application
|
:param app: Name of the application
|
||||||
:return: Version of downloaded apk
|
:return: Version of downloaded apk
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Trying to download {app}'s latest version from apkmirror")
|
from src.patches import Patches
|
||||||
page = self.config.apk_mirror_urls.get(app)
|
|
||||||
if not page:
|
package_name = Patches.get_package_name(app)
|
||||||
logger.debug("Invalid app")
|
response = apkmirror_status_check(package_name)
|
||||||
raise AppNotFound("Invalid app")
|
if response["data"][0]["exists"]:
|
||||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
version = response["data"][0]["release"]["version"]
|
||||||
try:
|
logger.debug(
|
||||||
main_page = parser.css_first(".appRowVariantTag>.accent_color").attributes[
|
f"Trying to download {app}'s latest version({version}) from apkmirror"
|
||||||
"href"
|
)
|
||||||
]
|
return self.specific_version(app, version)
|
||||||
except AttributeError:
|
raise AppNotFound("App not found on apkmirror.")
|
||||||
# Handles a case when variants are not available
|
|
||||||
main_page = parser.css_first(".downloadLink").attributes["href"]
|
|
||||||
match = re.search(r"\d", main_page)
|
|
||||||
if not match:
|
|
||||||
logger.error("Cannot find app main page")
|
|
||||||
raise AppNotFound()
|
|
||||||
main_page = f"{self.config.apk_mirror}{main_page}"
|
|
||||||
parser = LexborHTMLParser(self.config.session.get(main_page).text)
|
|
||||||
download_page = self.get_download_page(parser, main_page)
|
|
||||||
self.extract_download_link(download_page, app)
|
|
||||||
|
|||||||
@@ -1,10 +1,13 @@
|
|||||||
"""APK SOS Downloader Class."""
|
"""APK SOS Downloader Class."""
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from loguru import logger
|
import requests
|
||||||
from selectolax.lexbor import LexborHTMLParser
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from scripts.status_check import headers
|
||||||
from src.downloader.download import Downloader
|
from src.downloader.download import Downloader
|
||||||
|
from src.exceptions import AppNotFound
|
||||||
|
from src.utils import bs4_parser
|
||||||
|
|
||||||
|
|
||||||
class ApkSos(Downloader):
|
class ApkSos(Downloader):
|
||||||
@@ -16,13 +19,14 @@ class ApkSos(Downloader):
|
|||||||
:param page: Url of the page
|
:param page: Url of the page
|
||||||
:param app: Name of the app
|
:param app: Name of the app
|
||||||
"""
|
"""
|
||||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
r = requests.get(page, headers=headers, allow_redirects=True)
|
||||||
download_url = parser.css_first(
|
soup = BeautifulSoup(r.text, bs4_parser)
|
||||||
r"body > div > div > div > div > div.col-sm-12.col-md-8 > div.card.fluid.\.idma > "
|
download_button = soup.find(class_="col-sm-12 col-md-8 text-center")
|
||||||
"div.section.row > div.col-sm-12.col-md-8.text-center > p > a"
|
possible_links = download_button.find_all("a")
|
||||||
).attributes["href"]
|
for possible_link in possible_links:
|
||||||
self._download(download_url, f"{app}.apk")
|
if possible_link.get("href"):
|
||||||
logger.debug(f"Downloaded {app} apk from apk_combo_downloader in rt")
|
return self._download(possible_link["href"], f"{app}.apk")
|
||||||
|
raise AppNotFound("Unable to download apk from apk_combo")
|
||||||
|
|
||||||
def latest_version(self, app: str, **kwargs: Any) -> None:
|
def latest_version(self, app: str, **kwargs: Any) -> None:
|
||||||
"""Function to download whatever the latest version of app from
|
"""Function to download whatever the latest version of app from
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
"""Upto Down Downloader."""
|
"""Upto Down Downloader."""
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from selectolax.lexbor import LexborHTMLParser
|
|
||||||
|
|
||||||
|
from scripts.status_check import headers
|
||||||
from src.downloader.download import Downloader
|
from src.downloader.download import Downloader
|
||||||
from src.exceptions import AppNotFound
|
from src.exceptions import AppNotFound
|
||||||
from src.utils import bs4_parser
|
from src.utils import bs4_parser
|
||||||
@@ -14,9 +15,12 @@ class UptoDown(Downloader):
|
|||||||
"""Files downloader."""
|
"""Files downloader."""
|
||||||
|
|
||||||
def extract_download_link(self, page: str, app: str) -> None:
|
def extract_download_link(self, page: str, app: str) -> None:
|
||||||
parser = LexborHTMLParser(self.config.session.get(page).text)
|
r = requests.get(page, headers=headers, allow_redirects=True)
|
||||||
main_page = parser.css_first("#detail-download-button")
|
soup = BeautifulSoup(r.text, bs4_parser)
|
||||||
download_url = main_page.attributes["data-url"]
|
soup = soup.find(id="detail-download-button")
|
||||||
|
download_url = soup.get("data-url")
|
||||||
|
if not download_url:
|
||||||
|
raise AppNotFound("Unable to download from uptodown.")
|
||||||
self._download(download_url, f"{app}.apk")
|
self._download(download_url, f"{app}.apk")
|
||||||
logger.debug(f"Downloaded {app} apk from upto_down_downloader in rt")
|
logger.debug(f"Downloaded {app} apk from upto_down_downloader in rt")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user