From 9571861cd031b1ac3b3c63448846d9391b0ad95a Mon Sep 17 00:00:00 2001 From: Nikhil Badyal Date: Tue, 15 Aug 2023 15:32:09 +0530 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=8E=A8=20Use=20BS4=20for=20scrapping?= =?UTF-8?q?=20APKMirror?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/downloader/apkmirror.py | 129 +++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 60 deletions(-) diff --git a/src/downloader/apkmirror.py b/src/downloader/apkmirror.py index 43624bb..9ee0f3b 100644 --- a/src/downloader/apkmirror.py +++ b/src/downloader/apkmirror.py @@ -1,62 +1,84 @@ """Downloader Class.""" -import re from typing import Any +import requests +from bs4 import BeautifulSoup from loguru import logger -from selectolax.lexbor import LexborHTMLParser +from scripts.status_check import headers from src.downloader.download import Downloader from src.exceptions import AppNotFound +from src.utils import apkmirror_status_check, bs4_parser class ApkMirror(Downloader): """Files downloader.""" - def extract_download_link(self, page: str, app: str) -> None: + def _extract_force_download_link(self, link: str, app: str) -> None: + """Extract force download link.""" + r = requests.get(link, headers=headers) + if r.status_code != 200: + raise AppNotFound(f"Unable to connect with {link} on ApkMirror.") + soup = BeautifulSoup(r.text, bs4_parser) + notes_divs = soup.find(class_="tab-pane") + possible_links = notes_divs.find_all("a") + for possible_link in possible_links: + if possible_link.get("href") and "download.php?id=" in possible_link.get( + "href" + ): + return self._download( + self.config.apk_mirror + possible_link["href"], f"{app}.apk" + ) + raise AppNotFound(f"Unable to download apk from {link}") + + def extract_download_link(self, main_page: str, app: str) -> None: """Function to extract the download link from apkmirror html page. - :param page: Url of the page + :param main_page: Url of the page :param app: Name of the app """ - logger.debug(f"Extracting download link from\n{page}") - parser = LexborHTMLParser(self.config.session.get(page).text) - - resp = self.config.session.get( - self.config.apk_mirror + parser.css_first("a.accent_bg").attributes["href"] + logger.debug(f"Extracting download link from\n{main_page}") + r = requests.get(main_page, headers=headers) + if r.status_code != 200: + raise AppNotFound(f"Unable to connect with {main_page} on ApkMirror.") + soup = BeautifulSoup(r.text, bs4_parser) + download_button = soup.find(class_="center") + download_links = download_button.find_all("a") + final_download_link = None + for download_link in download_links: + if download_link.get("href"): + if "download/?key=" in download_link.get("href"): + final_download_link = download_link["href"] + break + if not final_download_link: + raise AppNotFound(f"Unable to download apk from {main_page}") + self._extract_force_download_link( + self.config.apk_mirror + final_download_link, app ) - parser = LexborHTMLParser(resp.text) - href = parser.css_first( - "p.notes:nth-child(3) > span:nth-child(1) > a:nth-child(1)" - ).attributes["href"] - self._download(self.config.apk_mirror + href, f"{app}.apk") - - def get_download_page(self, parser: LexborHTMLParser, main_page: str) -> str: + def get_download_page(self, main_page: str) -> str: """Function to get the download page in apk_mirror. :param parser: Parser :param main_page: Main Download Page in APK mirror(Index) :return: """ - logger.debug(f"Getting download page from {main_page}") - apm = parser.css(".apkm-badge") - sub_url = "" - for is_apm in apm: - parent_text = is_apm.parent.parent.text() - if "APK" in is_apm.text() and ( - "arm64-v8a" in parent_text - or "universal" in parent_text - or "noarch" in parent_text - ): - parser = is_apm.parent - sub_url = parser.css_first(".accent_color").attributes["href"] - break - if sub_url == "": - logger.exception( - f"Unable to find any apk on apkmirror_specific_version on {main_page}" - ) - raise AppNotFound("Unable to find apk on apkmirror site.") - return self.config.apk_mirror + sub_url + r = requests.get(main_page, headers=headers) + if r.status_code != 200: + raise AppNotFound(f"Unable to connect with {main_page} on ApkMirror.") + soup = BeautifulSoup(r.text, bs4_parser) + list_widget = soup.find(class_="listWidget") + table_rows = list_widget.find_all(class_="table-row") + sub_url = None + for row in table_rows: + if row.find(class_="accent_color"): + apk_type = row.find(class_="apkm-badge").get_text() + if apk_type == "APK": + sub_url = row.find(class_="accent_color")["href"] + break + if not sub_url: + raise AppNotFound("Unable to download apk from APKMirror.") + return f"{self.config.apk_mirror}{sub_url}" def specific_version(self, app: str, version: str) -> None: """Function to download the specified version of app from apkmirror. @@ -67,10 +89,7 @@ class ApkMirror(Downloader): """ version = version.replace(".", "-") main_page = f"{self.config.apk_mirror_version_urls.get(app)}-{version}-release/" - parser = LexborHTMLParser( - self.config.session.get(main_page, allow_redirects=True).text - ) - download_page = self.get_download_page(parser, main_page) + download_page = self.get_download_page(main_page) self.extract_download_link(download_page, app) def latest_version(self, app: str, **kwargs: Any) -> None: @@ -80,24 +99,14 @@ class ApkMirror(Downloader): :param app: Name of the application :return: Version of downloaded apk """ - logger.debug(f"Trying to download {app}'s latest version from apkmirror") - page = self.config.apk_mirror_urls.get(app) - if not page: - logger.debug("Invalid app") - raise AppNotFound("Invalid app") - parser = LexborHTMLParser(self.config.session.get(page).text) - try: - main_page = parser.css_first(".appRowVariantTag>.accent_color").attributes[ - "href" - ] - except AttributeError: - # Handles a case when variants are not available - main_page = parser.css_first(".downloadLink").attributes["href"] - match = re.search(r"\d", main_page) - if not match: - logger.error("Cannot find app main page") - raise AppNotFound() - main_page = f"{self.config.apk_mirror}{main_page}" - parser = LexborHTMLParser(self.config.session.get(main_page).text) - download_page = self.get_download_page(parser, main_page) - self.extract_download_link(download_page, app) + from src.patches import Patches + + package_name = Patches.get_package_name(app) + response = apkmirror_status_check(package_name) + if response["data"][0]["exists"]: + version = response["data"][0]["release"]["version"] + logger.debug( + f"Trying to download {app}'s latest version({version}) from apkmirror" + ) + return self.specific_version(app, version) + raise AppNotFound("App not found on apkmirror.") From 5c4cd4dcdca7163d668947a15a047b0f8d6482cd Mon Sep 17 00:00:00 2001 From: Nikhil Badyal Date: Tue, 15 Aug 2023 19:00:50 +0530 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=8E=A8=20Use=20BS4=20for=20apksos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/downloader/apksos.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/downloader/apksos.py b/src/downloader/apksos.py index bb6479b..7848d12 100644 --- a/src/downloader/apksos.py +++ b/src/downloader/apksos.py @@ -1,10 +1,13 @@ """APK SOS Downloader Class.""" from typing import Any -from loguru import logger -from selectolax.lexbor import LexborHTMLParser +import requests +from bs4 import BeautifulSoup +from scripts.status_check import headers from src.downloader.download import Downloader +from src.exceptions import AppNotFound +from src.utils import bs4_parser class ApkSos(Downloader): @@ -16,13 +19,14 @@ class ApkSos(Downloader): :param page: Url of the page :param app: Name of the app """ - parser = LexborHTMLParser(self.config.session.get(page).text) - download_url = parser.css_first( - r"body > div > div > div > div > div.col-sm-12.col-md-8 > div.card.fluid.\.idma > " - "div.section.row > div.col-sm-12.col-md-8.text-center > p > a" - ).attributes["href"] - self._download(download_url, f"{app}.apk") - logger.debug(f"Downloaded {app} apk from apk_combo_downloader in rt") + r = requests.get(page, headers=headers, allow_redirects=True) + soup = BeautifulSoup(r.text, bs4_parser) + download_button = soup.find(class_="col-sm-12 col-md-8 text-center") + possible_links = download_button.find_all("a") + for possible_link in possible_links: + if possible_link.get("href"): + return self._download(possible_link["href"], f"{app}.apk") + raise AppNotFound("Unable to download apk from apk_combo") def latest_version(self, app: str, **kwargs: Any) -> None: """Function to download whatever the latest version of app from From 887466814ca0a67e439fdb31771fd3719582fd53 Mon Sep 17 00:00:00 2001 From: Nikhil Badyal Date: Tue, 15 Aug 2023 19:25:37 +0530 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=8E=A8=20Use=20BS4=20for=20uptodown?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/downloader/uptodown.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/downloader/uptodown.py b/src/downloader/uptodown.py index b24ee79..30d8240 100644 --- a/src/downloader/uptodown.py +++ b/src/downloader/uptodown.py @@ -1,10 +1,11 @@ """Upto Down Downloader.""" from typing import Any +import requests from bs4 import BeautifulSoup from loguru import logger -from selectolax.lexbor import LexborHTMLParser +from scripts.status_check import headers from src.downloader.download import Downloader from src.exceptions import AppNotFound from src.utils import bs4_parser @@ -14,9 +15,12 @@ class UptoDown(Downloader): """Files downloader.""" def extract_download_link(self, page: str, app: str) -> None: - parser = LexborHTMLParser(self.config.session.get(page).text) - main_page = parser.css_first("#detail-download-button") - download_url = main_page.attributes["data-url"] + r = requests.get(page, headers=headers, allow_redirects=True) + soup = BeautifulSoup(r.text, bs4_parser) + soup = soup.find(id="detail-download-button") + download_url = soup.get("data-url") + if not download_url: + raise AppNotFound("Unable to download from uptodown.") self._download(download_url, f"{app}.apk") logger.debug(f"Downloaded {app} apk from upto_down_downloader in rt") From 4e88ce14ce3329ba0e6ee4a1ff06e1ce8b0e40bf Mon Sep 17 00:00:00 2001 From: Nikhil Badyal Date: Tue, 15 Aug 2023 19:28:23 +0530 Subject: [PATCH 4/4] =?UTF-8?q?=E2=9E=96=20Removed=20selectolax=20from=20r?= =?UTF-8?q?eq?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 84a3dea..2fdd342 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,4 @@ lastversion==3.0.1 loguru==0.7.0 pre-commit==3.3.3 requests==2.31.0 -selectolax==0.3.16 tqdm==4.66.1