🐛 Status Scrapper not scrapping icon from apkcombo (#320)

This commit is contained in:
Nikhil Badyal
2023-08-26 15:17:32 +05:30
committed by GitHub
parent 048f1affd8
commit 216b498a81
3 changed files with 58 additions and 31 deletions
+46 -26
View File
@@ -4,37 +4,48 @@ from pathlib import Path
from typing import List from typing import List
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup, Tag
from google_play_scraper import app as gplay_app from google_play_scraper import app as gplay_app
from google_play_scraper.exceptions import GooglePlayScraperException from google_play_scraper.exceptions import GooglePlayScraperException
from src.downloader.sources import (
APK_COMBO_GENERIC_URL,
APK_MIRROR_BASE_URL,
APK_MIRROR_PACKAGE_URL,
PLAY_STORE_APK_URL,
not_found_icon,
revanced_api,
)
from src.exceptions import APKComboIconScrapError, APKMirrorIconScrapError, UnknownError from src.exceptions import APKComboIconScrapError, APKMirrorIconScrapError, UnknownError
from src.patches import Patches from src.patches import Patches
from src.utils import apk_mirror_base_url, apkmirror_status_check, bs4_parser, handle_request_response, request_header from src.utils import apkmirror_status_check, bs4_parser, handle_request_response, request_header
not_found_icon = "https://img.icons8.com/bubbles/500/android-os.png"
no_of_col = 6 no_of_col = 6
combo_headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/116.0"}
def apkcombo_scrapper(package_name: str) -> str: def apkcombo_scrapper(package_name: str) -> str:
"""Apkcombo scrapper.""" """Apkcombo scrapper."""
apkcombo_url = APK_COMBO_GENERIC_URL.format(package_name)
try: try:
apkcombo_url = f"https://apkcombo.com/genericApp/{package_name}" r = requests.get(apkcombo_url, headers=combo_headers, allow_redirects=True, timeout=60)
r = requests.get(apkcombo_url, headers=request_header, allow_redirects=True, timeout=10)
soup = BeautifulSoup(r.text, bs4_parser) soup = BeautifulSoup(r.text, bs4_parser)
icon_element = soup.select_one("div.bubble-wrap > img") avatar = soup.find(class_="avatar")
if not icon_element: if not isinstance(avatar, Tag):
raise APKComboIconScrapError(url=apkcombo_url) raise APKComboIconScrapError(url=apkcombo_url)
url = icon_element["data-src"] icon_element = avatar.find("img")
if not isinstance(icon_element, Tag):
raise APKComboIconScrapError(url=apkcombo_url)
url = icon_element.get("data-src")
return re.sub(r"=.*$", "", url) # type: ignore[arg-type] return re.sub(r"=.*$", "", url) # type: ignore[arg-type]
except UnknownError: except UnknownError as e:
return not_found_icon raise APKComboIconScrapError(url=apkcombo_url) from e
def apkmirror_scrapper(package_name: str) -> str: def apkmirror_scrapper(package_name: str) -> str:
"""Apkmirror URL.""" """Apkmirror URL."""
response = apkmirror_status_check(package_name) response = apkmirror_status_check(package_name)
search_url = f"{apk_mirror_base_url}/?s={package_name}" search_url = APK_MIRROR_PACKAGE_URL.format(package_name)
if response["data"][0]["exists"]: if response["data"][0]["exists"]:
return _extracted_from_apkmirror_scrapper(search_url) return _extracted_from_apkmirror_scrapper(search_url)
raise APKMirrorIconScrapError(url=search_url) raise APKMirrorIconScrapError(url=search_url)
@@ -54,32 +65,42 @@ def _extracted_from_apkmirror_scrapper(search_url: str) -> str:
# regular expression pattern to match w=xx&h=xx&q=xx # regular expression pattern to match w=xx&h=xx&q=xx
pattern = r"(w=\d+&h=\d+&q=\d+)" pattern = r"(w=\d+&h=\d+&q=\d+)"
return apk_mirror_base_url + re.sub(pattern, f"w={new_width}&h={new_height}&q={new_quality}", sub_url) return APK_MIRROR_BASE_URL + re.sub(pattern, f"w={new_width}&h={new_height}&q={new_quality}", sub_url)
def gplay_icon_scrapper(package_name: str) -> str: def gplay_icon_scrapper(package_name: str) -> str:
"""Scrap Icon from Gplay.""" """Scrap Icon from Gplay."""
# noinspection PyBroadException # noinspection PyBroadException
try: try:
result = gplay_app( return str(
gplay_app(
package_name, package_name,
)["icon"]
) )
if result["icon"]: except UnknownError as e:
return str(result["icon"]) raise GooglePlayScraperException from e
raise GooglePlayScraperException
def icon_scrapper(package_name: str) -> str:
"""Scrap Icon."""
try:
return gplay_icon_scrapper(package_name)
except GooglePlayScraperException: except GooglePlayScraperException:
try: try:
return apkmirror_scrapper(package_name) return apkmirror_scrapper(package_name)
except APKMirrorIconScrapError: except APKMirrorIconScrapError:
try:
return apkcombo_scrapper(package_name) return apkcombo_scrapper(package_name)
except APKComboIconScrapError:
return not_found_icon
except UnknownError: except UnknownError:
return not_found_icon return not_found_icon
def generate_markdown_table(data: List[List[str]]) -> str: def generate_markdown_table(data: List[List[str]]) -> str:
"""Generate table.""" """Generate markdown table."""
if not data: if not data:
return "No data to generate table." return "No data to generate for the table."
table = ( table = (
"| Package Name | App Icon | PlayStore link | APKMirror link|APKCombo Link| Supported?|\n" "| Package Name | App Icon | PlayStore link | APKMirror link|APKCombo Link| Supported?|\n"
@@ -87,7 +108,7 @@ def generate_markdown_table(data: List[List[str]]) -> str:
) )
for row in data: for row in data:
if len(row) != no_of_col: if len(row) != no_of_col:
msg = "Each row must contain 6 columns of data." msg = f"Each row must contain {no_of_col} columns of data."
raise ValueError(msg) raise ValueError(msg)
table += f"| {row[0]} | {row[1]} | {row[2]} | {row[3]} |{row[4]} |{row[5]} |\n" table += f"| {row[0]} | {row[1]} | {row[2]} | {row[3]} |{row[4]} |{row[5]} |\n"
@@ -97,8 +118,7 @@ def generate_markdown_table(data: List[List[str]]) -> str:
def main() -> None: def main() -> None:
"""Entrypoint.""" """Entrypoint."""
repo_url = "https://releases.revanced.app/patches" response = requests.get(revanced_api, timeout=10)
response = requests.get(repo_url, timeout=10)
handle_request_response(response) handle_request_response(response)
patches = response.json() patches = response.json()
@@ -110,14 +130,14 @@ def main() -> None:
supported_app = set(Patches.support_app().keys()) supported_app = set(Patches.support_app().keys())
missing_support = sorted(possible_apps.difference(supported_app)) missing_support = sorted(possible_apps.difference(supported_app))
output = "New app found which aren't supported or outdated.\n\n" output = "New app found which aren't supported.\n\n"
data = [ data = [
[ [
app, app,
f'<img src="{gplay_icon_scrapper(app)}" width=50 height=50>', f'<img src="{icon_scrapper(app)}" width=50 height=50>',
f"[PlayStore Link](https://play.google.com/store/apps/details?id={app})", f"[PlayStore Link]({PLAY_STORE_APK_URL.format(app)})",
f"[APKMirror Link](https://www.apkmirror.com/?s={app})", f"[APKMirror Link]({APK_MIRROR_PACKAGE_URL.format(app)})",
f"[APKCombo Link](https://apkcombo.com/genericApp/{app})", f"[APKCombo Link]({APK_COMBO_GENERIC_URL.format(app)})",
"<li>- [ ] </li>", "<li>- [ ] </li>",
] ]
for app in missing_support for app in missing_support
+8
View File
@@ -1,12 +1,20 @@
"""APK Sources used.""" """APK Sources used."""
APK_MIRROR_BASE_URL = "https://www.apkmirror.com" APK_MIRROR_BASE_URL = "https://www.apkmirror.com"
APK_MIRROR_BASE_APK_URL = f"{APK_MIRROR_BASE_URL}/apk" APK_MIRROR_BASE_APK_URL = f"{APK_MIRROR_BASE_URL}/apk"
APK_MIRROR_PACKAGE_URL = f"{APK_MIRROR_BASE_URL}/?s=" + "{}"
APK_MIRROR_APK_CHECK = f"{APK_MIRROR_BASE_URL}/wp-json/apkm/v1/app_exists/"
UPTODOWN_BASE_URL = "https://{}.en.uptodown.com/android" UPTODOWN_BASE_URL = "https://{}.en.uptodown.com/android"
APK_PURE_BASE_URL = "https://d.apkpure.com/b/APK" APK_PURE_BASE_URL = "https://d.apkpure.com/b/APK"
APK_PURE_URL = APK_PURE_BASE_URL + "/{}?version=latest" APK_PURE_URL = APK_PURE_BASE_URL + "/{}?version=latest"
APKS_SOS_BASE_URL = "https://apksos.com/download-app" APKS_SOS_BASE_URL = "https://apksos.com/download-app"
APK_SOS_URL = APKS_SOS_BASE_URL + "/{}" APK_SOS_URL = APKS_SOS_BASE_URL + "/{}"
GITHUB_BASE_URL = "https://github.com" GITHUB_BASE_URL = "https://github.com"
PLAY_STORE_BASE_URL = "https://play.google.com"
PLAY_STORE_APK_URL = f"{PLAY_STORE_BASE_URL}/store/apps/details?id=" + "{}"
APK_COMBO_BASE_URL = "https://apkcombo.com"
APK_COMBO_GENERIC_URL = APK_COMBO_BASE_URL + "/genericApp/{}"
not_found_icon = "https://img.icons8.com/bubbles/500/android-os.png"
revanced_api = "https://releases.revanced.app/patches"
apk_sources = { apk_sources = {
"backdrops": f"{APK_MIRROR_BASE_APK_URL}/backdrops/backdrops-wallpapers/", "backdrops": f"{APK_MIRROR_BASE_APK_URL}/backdrops/backdrops-wallpapers/",
"bacon": f"{APK_MIRROR_BASE_APK_URL}/onelouder-apps/baconreader-for-reddit/", "bacon": f"{APK_MIRROR_BASE_APK_URL}/onelouder-apps/baconreader-for-reddit/",
+2 -3
View File
@@ -11,6 +11,7 @@ from loguru import logger
from requests import Response from requests import Response
from src.config import RevancedConfig from src.config import RevancedConfig
from src.downloader.sources import APK_MIRROR_APK_CHECK
from src.downloader.utils import status_code_200 from src.downloader.utils import status_code_200
from src.exceptions import DownloadError from src.exceptions import DownloadError
@@ -19,7 +20,6 @@ default_build = [
"youtube_music", "youtube_music",
] ]
possible_archs = ["armeabi-v7a", "x86", "x86_64", "arm64-v8a"] possible_archs = ["armeabi-v7a", "x86", "x86_64", "arm64-v8a"]
apk_mirror_base_url = "https://www.apkmirror.com"
request_header = { request_header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (HTML, like Gecko)" "AppleWebKit/537.36 (HTML, like Gecko)"
@@ -212,9 +212,8 @@ def apkmirror_status_check(package_name: str) -> Any:
------- -------
the response from the APKMirror API as a JSON object. the response from the APKMirror API as a JSON object.
""" """
api_url = f"{apk_mirror_base_url}/wp-json/apkm/v1/app_exists/"
body = {"pnames": [package_name]} body = {"pnames": [package_name]}
response = requests.post(api_url, json=body, headers=request_header, timeout=60) response = requests.post(APK_MIRROR_APK_CHECK, json=body, headers=request_header, timeout=60)
return response.json() return response.json()