rabbit whole lotta fixes

sigma67 · Jan 17, 2024 · 5bcee26 · 5bcee26
1 parent 10f91b5
commit 5bcee26
Show file tree

Hide file tree

Showing 10 changed files with 168 additions and 59 deletions.
diff --git a/tests/mixins/test_browsing.py b/tests/mixins/test_browsing.py
@@ -11,17 +11,28 @@ def test_get_home(self, yt, yt_auth):
         assert len(result) >= 15
 
     def test_get_artist(self, yt):
-        results = yt.get_artist("MPLAUCmMUZbaYdNH0bEd1PAlAqsA")
-        assert len(results) == 14
+        artist = yt.get_artist("MPLAUCmMUZbaYdNH0bEd1PAlAqsA")
+        assert len(artist) == 16
+
+        # make sure artists are correctly filled for categories
+        for k in ["songs", "videos"]:
+            assert {"id": "UCmMUZbaYdNH0bEd1PAlAqsA", "name": "Oasis"} in artist[k]["results"][0]["artists"]
+        single = artist["singles"]["results"][0]
+        assert len(single["year"]) == 4 and single["year"].isnumeric()
+        assert single["type"] == "Single"
 
         # test correctness of related artists
-        related = results["related"]["results"]
+        related = artist["related"]["results"]
         assert len(
-            [x for x in related if set(x.keys()) == {"browseId", "subscribers", "title", "thumbnails"}]
+            [
+                x
+                for x in related
+                if set(x.keys()) == {"browseId", "subscribers", "title", "thumbnails", "sub_count"}
+            ]
         ) == len(related)
 
-        results = yt.get_artist("UCLZ7tlKC06ResyDmEStSrOw")  # no album year
-        assert len(results) >= 11
+        artist = yt.get_artist("UCLZ7tlKC06ResyDmEStSrOw")  # no album year
+        assert len(artist) >= 11
 
     def test_get_artist_albums(self, yt):
         artist = yt.get_artist("UCAeLFBCQS7FvI8PvBrWvSBg")
@@ -106,6 +117,19 @@ def test_get_album_other_versions(self, yt):
         assert variant["artists"][1]["name"] == "RAYE"
         assert variant["artists"][2] == {"id": "UCb7jnkQW94hzOoWkG14zs4w", "name": "D-Block Europe"}
 
+    def test_get_album_parsing(self, yt):
+        album = yt.get_album("MPREb_HLU4ajrAzcU")  # Flume - Palaces
+        # album has a track with 3 artists, linked
+        assert len(targ := album["tracks"][3]["artists"]) == 3
+        # all artists should have ids
+        assert len([x["id"] for x in targ if x["id"]]) == 3
+
+        album = yt.get_album("MPREb_M4IdGHS6DyO")  # IMANU - Unfold
+        # album has tracks with 3 unlinked artists
+        assert len(targ := album["tracks"][3]["artists"]) == 3
+        # test at least album artist is filled
+        assert len([x["id"] for x in targ if x["id"]]) >= 1
+
     def test_get_song(self, config, yt, yt_oauth, sample_video):
         song = yt_oauth.get_song(config["uploads"]["private_upload_id"])  # private upload
         assert len(song) == 5

diff --git a/ytmusicapi/mixins/browsing.py b/ytmusicapi/mixins/browsing.py
@@ -12,6 +12,7 @@
 from ytmusicapi.parsers.playlists import parse_playlist_items
 
 from ..navigation import *
+from ..parsers._utils import parse_real_count  # protected ?
 from ._protocol import MixinProtocol
 from ._utils import get_datestamp
 
@@ -234,6 +235,7 @@ def get_artist(self, channelId: str) -> Dict:
                 if "subheader" not in descriptionShelf
                 else descriptionShelf["subheader"]["runs"][0]["text"]
             )
+            artist["view_count"] = parse_real_count(nav(descriptionShelf, ["subheader", "runs", 0], True))
         subscription_button = header["subscriptionButton"]["subscribeButtonRenderer"]
         artist["channelId"] = subscription_button["channelId"]
         artist["shuffleId"] = nav(
@@ -243,6 +245,9 @@ def get_artist(self, channelId: str) -> Dict:
             header, ["startRadioButton", "buttonRenderer"] + NAVIGATION_WATCH_PLAYLIST_ID, True
         )
         artist["subscribers"] = nav(subscription_button, ["subscriberCountText", "runs", 0, "text"], True)
+        artist["sub_count"] = parse_real_count(
+            nav(subscription_button, ["subscriberCountText", "runs", 0], True)
+        )
         artist["subscribed"] = subscription_button["subscribed"]
         artist["thumbnails"] = nav(header, THUMBNAILS, True)
         artist["songs"] = {"browseId": None}
@@ -494,7 +499,7 @@ def get_album(self, browseId: str) -> Dict:
         response = self._send_request(endpoint, body)
         album = parse_album_header(response)
         results = nav(response, SINGLE_COLUMN_TAB + SECTION_LIST_ITEM + MUSIC_SHELF)
-        album["tracks"] = parse_playlist_items(results["contents"], is_album=True)
+        album["tracks"] = parse_playlist_items(results["contents"], by_artists=album["artists"])
         results = nav(response, SINGLE_COLUMN_TAB + SECTION_LIST + [1] + CAROUSEL, True)
         if results is not None:
             album["other_versions"] = parse_content_list(results["contents"], parse_album)

diff --git a/ytmusicapi/navigation.py b/ytmusicapi/navigation.py
@@ -2,7 +2,9 @@
 from typing import Any, Dict, List, Literal, Optional, overload
 
 CONTENT = ["contents", 0]
-RUN_TEXT = ["runs", 0, "text"]
+ZTEXT = [0, "text"]
+TTEXT = [2, "text"]
+RUN_TEXT = ["runs"] + ZTEXT
 TAB_CONTENT = ["tabs", 0, "tabRenderer", "content"]
 TAB_1_CONTENT = ["tabs", 1, "tabRenderer", "content"]
 SINGLE_COLUMN = ["contents", "singleColumnBrowseResultsRenderer"]
@@ -43,7 +45,10 @@
 TEXT_RUN_TEXT = TEXT_RUN + ["text"]
 SUBTITLE = ["subtitle"] + RUN_TEXT
 SUBTITLE_RUNS = ["subtitle", "runs"]
-SUBTITLE2 = SUBTITLE_RUNS + [2, "text"]
+LAST_RUN = ["runs", -1]
+TEXT_LAST_RUN = ["text"] + LAST_RUN
+LAST_SUB_RUN = ["subtitle"] + LAST_RUN
+SUBTITLE2 = SUBTITLE_RUNS + TTEXT
 SUBTITLE3 = SUBTITLE_RUNS + [4, "text"]
 THUMBNAIL = ["thumbnail", "thumbnails"]
 THUMBNAILS = ["thumbnail", "musicThumbnailRenderer"] + THUMBNAIL

diff --git a/ytmusicapi/parsers/_utils.py b/ytmusicapi/parsers/_utils.py
@@ -67,6 +67,18 @@ def get_dot_separator_index(runs):
     return index
 
 
+def parse_real_count(run):
+    """Pull an int from views, plays, or subs"""
+    if not run or "text" not in run:
+        return -1
+    count = run["text"].split(" ")[0]
+    for fx in [("K", 1_000), ("M", 1_000_000), ("B", 1_000_000_000)]:
+        if fx[0] in count:
+            return int(float(count.replace(fx[0], "")) * fx[1])
+
+    return int(count.replace(",", ""))
+
+
 def parse_duration(duration):
     if duration is None:
         return duration

diff --git a/ytmusicapi/parsers/browsing.py b/ytmusicapi/parsers/browsing.py
@@ -52,31 +52,30 @@ def parse_content_list(results, parse_func, key=MTRIR):
 
 
 def parse_album(result):
-    return {
+    album = {
         "title": nav(result, TITLE_TEXT),
-        "type": nav(result, SUBTITLE),
-        "artists": [parse_id_name(x) for x in nav(result, ["subtitle", "runs"]) if "navigationEndpoint" in x],
         "browseId": nav(result, TITLE + NAVIGATION_BROWSE_ID),
         "audioPlaylistId": nav(result, THUMBNAIL_OVERLAY, True),
         "thumbnails": nav(result, THUMBNAIL_RENDERER),
         "isExplicit": nav(result, SUBTITLE_BADGE_LABEL, True) is not None,
     }
 
+    runs = nav(result, SUBTITLE_RUNS)
+    if len(runs) >= 2:
+        album["type"] = nav(runs, ZTEXT, True)
 
-def parse_id_name(sub_run):
-    return {
-        "id": nav(sub_run, NAVIGATION_BROWSE_ID, True),
-        "name": nav(sub_run, ["text"], True),
-    }
+        # navigationEndpoint key is present when secondary runs are artists
+        if "navigationEndpoint" in runs[2]:
+            album["artists"] = artists_from_runs(runs)
+        else:
+            album["year"] = nav(runs, TTEXT, True)
 
+    # it's a single with just the year
+    else:
+        album["type"] = "Single"
+        album["year"] = nav(runs, ZTEXT, True)
 
-def parse_single(result):
-    return {
-        "title": nav(result, TITLE_TEXT),
-        "year": nav(result, SUBTITLE, True),
-        "browseId": nav(result, TITLE + NAVIGATION_BROWSE_ID),
-        "thumbnails": nav(result, THUMBNAIL_RENDERER),
-    }
+    return album
 
 
 def parse_song(result):
@@ -95,15 +94,16 @@ def parse_song_flat(data):
     song = {
         "title": nav(columns[0], TEXT_RUN_TEXT),
         "videoId": nav(columns[0], TEXT_RUN + NAVIGATION_VIDEO_ID, True),
-        "artists": parse_song_artists(data, 1),
+        "artists": parse_pl_song_artists(data, 1),
         "thumbnails": nav(data, THUMBNAILS),
         "isExplicit": nav(data, BADGE_LABEL, True) is not None,
     }
-    if len(columns) > 2 and columns[2] is not None and "navigationEndpoint" in nav(columns[2], TEXT_RUN):
-        song["album"] = {
-            "name": nav(columns[2], TEXT_RUN_TEXT),
-            "id": nav(columns[2], TEXT_RUN + NAVIGATION_BROWSE_ID),
-        }
+    if (
+        len(columns) > 2
+        and columns[2] is not None
+        and "navigationEndpoint" in (targ := nav(columns[2], TEXT_RUN))
+    ):
+        song["album"] = parse_id_name(targ)
     else:
         song["views"] = nav(columns[1], ["text", "runs", -1, "text"]).split(" ")[0]
 
@@ -112,34 +112,69 @@ def parse_song_flat(data):
 
 def parse_video(result):
     runs = nav(result, SUBTITLE_RUNS)
-    artists_len = get_dot_separator_index(runs)
+    # artists_len = get_dot_separator_index(runs)
     videoId = nav(result, NAVIGATION_VIDEO_ID, True)
     if not videoId:
+        # I believe this
         videoId = next(
-            id for entry in nav(result, MENU_ITEMS) if nav(entry, MENU_SERVICE + QUEUE_VIDEO_ID, True)
-        )
-    return {
+            (
+                found
+                for entry in nav(result, MENU_ITEMS)
+                if (found := nav(entry, MENU_SERVICE + QUEUE_VIDEO_ID, True))
+            ),
+            None,
+        )  # this won't match anything for episodes, None to catch iterator
+    result = {
         "title": nav(result, TITLE_TEXT),
         "videoId": videoId,
-        "artists": parse_song_artists_runs(runs[:artists_len]),
         "playlistId": nav(result, NAVIGATION_PLAYLIST_ID, True),
         "thumbnails": nav(result, THUMBNAIL_RENDERER, True),
-        "views": runs[-1]["text"].split(" ")[0],
     }
 
+    # it's an ~episode~ -> makes the first key a duration { "text": "%m min %s sec" } format
+    # unsure if we should capture the duration for edge cases
+    # could also be an unlinked artist
+    if "navigationEndpoint" not in runs[0] and any(x in runs[0]["text"] for x in ["sec", "min"]):
+        result["type"] = "episode"
+        # views are unavailable on episodes
+        result["views"] = None
+        result["view_count"] = -1
+        result["artists"] = artists_from_runs(runs[2:], 0)
+    else:
+        result["type"] = "song"
+        result["views"] = runs[-1]["text"].split(" ")[0]
+        result["view_count"] = parse_real_count(runs[-1]) if len(runs) > 2 else -1
+        result["artists"] = artists_from_runs(runs[:-2], 0)
+
+    return result
+
 
 def parse_playlist(data):
     playlist = {
         "title": nav(data, TITLE_TEXT),
         "playlistId": nav(data, TITLE + NAVIGATION_BROWSE_ID)[2:],
         "thumbnails": nav(data, THUMBNAIL_RENDERER),
     }
-    subtitle = data["subtitle"]
-    if "runs" in subtitle:
-        playlist["description"] = "".join([run["text"] for run in subtitle["runs"]])
-        if len(subtitle["runs"]) == 3 and re.search(r"\d+ ", nav(data, SUBTITLE2)):
-            playlist["count"] = nav(data, SUBTITLE2).split(" ")[0]
-            playlist["author"] = parse_song_artists_runs(subtitle["runs"][:1])
+    runs = nav(data, SUBTITLE_RUNS)
+    if runs:
+        playlist["description"] = "".join([run["text"] for run in runs])
+        if len(runs) == 3 and runs[1]["text"] == " • ":
+            # genre charts from get_charts('US') are sent here...
+            if runs[0]["text"] == "Chart" or runs[-1]["text"] == "YouTube Music":
+                playlist["count"] = None
+                playlist["view_count"] = -1
+                playlist["author"] = {"name": "YouTube Music", "id": None}
+                playlist["featured_artists"] = None
+            else:
+                playlist["count"] = nav(data, SUBTITLE2).split(" ")[0]  # this is "views" everywhere else
+                playlist["view_count"] = parse_real_count(runs[2])
+                playlist["author"] = parse_id_name(runs[0])
+                playlist["featured_artists"] = None
+        else:
+            playlist["featured_artists"] = nav(runs, ZTEXT, True)
+            # fill default, maintain return format
+            playlist["author"] = {"name": "YouTube Music", "id": None}
+            playlist["view_count"] = -1
 
     return playlist
 
@@ -152,6 +187,7 @@ def parse_related_artist(data):
         "title": nav(data, TITLE_TEXT),
         "browseId": nav(data, TITLE + NAVIGATION_BROWSE_ID),
         "subscribers": subscribers,
+        "sub_count": parse_real_count(nav(data, LAST_SUB_RUN, True)),
         "thumbnails": nav(data, THUMBNAIL_RENDERER),
     }
 

diff --git a/ytmusicapi/parsers/explore.py b/ytmusicapi/parsers/explore.py
@@ -26,7 +26,7 @@ def parse_chart_artist(data):
 
 def parse_chart_trending(data):
     flex_0 = get_flex_column_item(data, 0)
-    artists = parse_song_artists(data, 1)
+    artists = parse_pl_song_artists(data, 1)
     index = get_dot_separator_index(artists)
     # last item is views for some reason
     views = None if index == len(artists) else artists.pop()["name"].split(" ")[0]

diff --git a/ytmusicapi/parsers/i18n.py b/ytmusicapi/parsers/i18n.py
@@ -7,7 +7,6 @@
     parse_content_list,
     parse_playlist,
     parse_related_artist,
-    parse_single,
     parse_video,
 )
 
@@ -33,7 +32,7 @@ def get_search_result_types(self):
     def parse_artist_contents(self, results: List) -> Dict:
         categories = ["albums", "singles", "videos", "playlists", "related"]
         categories_local = [_("albums"), _("singles"), _("videos"), _("playlists"), _("related")]  # type: ignore[name-defined]
-        categories_parser = [parse_album, parse_single, parse_video, parse_playlist, parse_related_artist]
+        categories_parser = [parse_album, parse_album, parse_video, parse_playlist, parse_related_artist]
         artist: Dict[str, Any] = {}
         for i, category in enumerate(categories):
             data = [

diff --git a/ytmusicapi/parsers/playlists.py b/ytmusicapi/parsers/playlists.py
@@ -3,7 +3,7 @@
 from .songs import *
 
 
-def parse_playlist_items(results, menu_entries: Optional[List[List]] = None, is_album=False):
+def parse_playlist_items(results, menu_entries: Optional[List[List]] = None, by_artists=None):
     songs = []
     for result in results:
         if MRLIR not in result:
@@ -44,7 +44,9 @@ def parse_playlist_items(results, menu_entries: Optional[List[List]] = None, is_
         if title == "Song deleted":
             continue
 
-        artists = parse_song_artists(data, 1)
+        # when parsing album, artists are passed in
+        # to assist polyfill on unlinked artists
+        artists = parse_pl_song_artists(data, 1, fill_artists=by_artists)
 
         album = parse_song_album(data, 2)
 
@@ -93,7 +95,7 @@ def parse_playlist_items(results, menu_entries: Optional[List[List]] = None, is_
             "views": views,
         }
 
-        if is_album:
+        if by_artists:
             song["track_number"] = int(nav(data, ["index", "runs", 0, "text"])) if isAvailable else None
 
         if duration: