Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add list #226

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions google_play_scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
from .features.permissions import permissions # noqa: F401
from .features.reviews import reviews, reviews_all # noqa: F401
from .features.search import search # noqa: F401
from .features.list import list
17 changes: 16 additions & 1 deletion google_play_scraper/constants/element.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from datetime import datetime
from typing import Any, Callable, List, Optional
from urllib.parse import urljoin

from google_play_scraper.utils import nested_lookup
from google_play_scraper.utils.data_processors import unescape_text

from google_play_scraper.constants.request import PLAY_STORE_BASE_URL

class ElementSpec:
def __init__(
Expand Down Expand Up @@ -240,3 +241,17 @@ class ElementSpecs:
"developer": ElementSpec(None, [0, 14]),
"installs": ElementSpec(None, [0, 15]),
}

List = {
"title": ElementSpec(None, [0, 3]),
"appId": ElementSpec(None, [0, 0, 0]),
"url": ElementSpec(None, [0, 10, 4, 2], lambda path: urljoin(PLAY_STORE_BASE_URL, path)),
"icon": ElementSpec(None, [0, 1, 3, 2]),
"developer": ElementSpec(None, [0, 14]),
"currency": ElementSpec(None, [0, 8, 1, 0, 1]),
"price": ElementSpec(None, [0, 8, 1, 0, 0], lambda price: price / 1000000),
"free": ElementSpec(None, [0, 8, 1, 0, 0], lambda price: price == 0),
"summary": ElementSpec(None, [0, 13, 1]),
"scoreText": ElementSpec(None, [0, 4, 0]),
"score": ElementSpec(None, [0, 4, 1])
}
61 changes: 61 additions & 0 deletions google_play_scraper/constants/google_play.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,64 @@ class Device(int, Enum):
TABLET = 3
CHROMEBOOK = 5
TV = 6

class Collection(str, Enum):
TOP_FREE = "topselling_free"
TOP_PAID = "topselling_paid"
GROSSING = "topgrossing"

class Category(str, Enum):
APPLICATION = 'APPLICATION'
ANDROID_WEAR = 'ANDROID_WEAR'
ART_AND_DESIGN = 'ART_AND_DESIGN'
AUTO_AND_VEHICLES = 'AUTO_AND_VEHICLES'
BEAUTY = 'BEAUTY'
BOOKS_AND_REFERENCE = 'BOOKS_AND_REFERENCE'
BUSINESS = 'BUSINESS'
COMICS = 'COMICS'
COMMUNICATION = 'COMMUNICATION'
DATING = 'DATING'
EDUCATION = 'EDUCATION'
ENTERTAINMENT = 'ENTERTAINMENT'
EVENTS = 'EVENTS'
FINANCE = 'FINANCE'
FOOD_AND_DRINK = 'FOOD_AND_DRINK'
HEALTH_AND_FITNESS = 'HEALTH_AND_FITNESS'
HOUSE_AND_HOME = 'HOUSE_AND_HOME'
LIBRARIES_AND_DEMO = 'LIBRARIES_AND_DEMO'
LIFESTYLE = 'LIFESTYLE'
MAPS_AND_NAVIGATION = 'MAPS_AND_NAVIGATION'
MEDICAL = 'MEDICAL'
MUSIC_AND_AUDIO = 'MUSIC_AND_AUDIO'
NEWS_AND_MAGAZINES = 'NEWS_AND_MAGAZINES'
PARENTING = 'PARENTING'
PERSONALIZATION = 'PERSONALIZATION'
PHOTOGRAPHY = 'PHOTOGRAPHY'
PRODUCTIVITY = 'PRODUCTIVITY'
SHOPPING = 'SHOPPING'
SOCIAL = 'SOCIAL'
SPORTS = 'SPORTS'
TOOLS = 'TOOLS'
TRAVEL_AND_LOCAL = 'TRAVEL_AND_LOCAL'
VIDEO_PLAYERS = 'VIDEO_PLAYERS'
WATCH_FACE = 'WATCH_FACE'
WEATHER = 'WEATHER'
GAME = 'GAME'
GAME_ACTION = 'GAME_ACTION'
GAME_ADVENTURE = 'GAME_ADVENTURE'
GAME_ARCADE = 'GAME_ARCADE'
GAME_BOARD = 'GAME_BOARD'
GAME_CARD = 'GAME_CARD'
GAME_CASINO = 'GAME_CASINO'
GAME_CASUAL = 'GAME_CASUAL'
GAME_EDUCATIONAL = 'GAME_EDUCATIONAL'
GAME_MUSIC = 'GAME_MUSIC'
GAME_PUZZLE = 'GAME_PUZZLE'
GAME_RACING = 'GAME_RACING'
GAME_ROLE_PLAYING = 'GAME_ROLE_PLAYING'
GAME_SIMULATION = 'GAME_SIMULATION'
GAME_SPORTS = 'GAME_SPORTS'
GAME_STRATEGY = 'GAME_STRATEGY'
GAME_TRIVIA = 'GAME_TRIVIA'
GAME_WORD = 'GAME_WORD'
FAMILY = 'FAMILY'
1 change: 1 addition & 0 deletions google_play_scraper/constants/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ class Regex:
KEY = re.compile("(ds:.*?)'")
VALUE = re.compile(r"data:([\s\S]*?), sideChannel: {}}\);<\/")
REVIEWS = re.compile(r"\)]}'\n\n([\s\S]+)")
LIST = re.compile(r"\)]}'\n\n\d+\n([\s\S]+?)\n")
PERMISSIONS = re.compile(r"\)]}'\n\n([\s\S]+)")
18 changes: 18 additions & 0 deletions google_play_scraper/constants/request.py

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions google_play_scraper/features/list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import json
from typing import List, Dict

from google_play_scraper.constants.element import ElementSpecs
from google_play_scraper.constants.regex import Regex
from google_play_scraper.constants.request import Formats
from google_play_scraper.utils.request import post
from google_play_scraper.constants.google_play import Collection, Category

def list(lang: str="en", country: str="us", num: int=100, collection: Collection=Collection.TOP_FREE, category: Category=Category.APPLICATION) -> List[Dict]:
dom = post(
Formats.List.build(lang=lang, country=country),
Formats.List.build_body(num=num, collection=collection.value, category=category.value),
{'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'},
)

matches = json.loads(Regex.LIST.findall(dom)[0])
container = json.loads(matches[0][2])[0][1][0][28][0]

# Don't use list() here!
result = []


for app_info in container:
info = dict()

for k, spec in ElementSpecs.List.items():
content = spec.extract_content(app_info)
info[k] = content

result.append(info)

return result