From 44cc59d0bd09c198cec8ca23ed2e84cdd6c5c942 Mon Sep 17 00:00:00 2001 From: Michael Bridak Date: Sat, 3 Jun 2023 23:02:07 -0700 Subject: [PATCH 1/3] Update bigcty.py Extract the download link. --- ctyparser/bigcty.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ctyparser/bigcty.py b/ctyparser/bigcty.py index 88aa972..7502bc6 100644 --- a/ctyparser/bigcty.py +++ b/ctyparser/bigcty.py @@ -21,6 +21,7 @@ import feedparser from typing import Union +from lxml import html default_feed = "http://www.country-files.com/category/big-cty/feed/" @@ -172,7 +173,9 @@ def update(self) -> bool: with tempfile.TemporaryDirectory() as temp: path = pathlib.PurePath(temp) - dl_url = f'http://www.country-files.com/bigcty/download/{update_date[:4]}/bigcty-{update_date}.zip' # TODO: Issue #10 + page = session.get(update_url) + tree = html.fromstring(page.content) + dl_url = tree.xpath("//a[contains(@href,'zip')]/@href")[0] rq = session.get(dl_url) if rq.status_code == 404: dl_url = f'http://www.country-files.com/bigcty/download/bigcty-{update_date}.zip' From d3e47153355e2bc5acaf0b416cd042cc00590d89 Mon Sep 17 00:00:00 2001 From: Michael Bridak Date: Sat, 3 Jun 2023 23:06:46 -0700 Subject: [PATCH 2/3] Update devrequirements.txt Add depenency --- devrequirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/devrequirements.txt b/devrequirements.txt index 4df7d58..91c6baf 100644 --- a/devrequirements.txt +++ b/devrequirements.txt @@ -7,3 +7,4 @@ sphinx # Dependencies feedparser requests +lxml From 3badac29690219b9d81660be67f36cd033e2e899 Mon Sep 17 00:00:00 2001 From: mbridak Date: Sun, 4 Jun 2023 09:20:03 -0700 Subject: [PATCH 3/3] pushed changes --- ctyparser/bigcty.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ctyparser/bigcty.py b/ctyparser/bigcty.py index 7502bc6..5de22eb 100644 --- a/ctyparser/bigcty.py +++ b/ctyparser/bigcty.py @@ -174,14 +174,16 @@ def update(self) -> bool: with tempfile.TemporaryDirectory() as temp: path = pathlib.PurePath(temp) page = session.get(update_url) + if page.status_code != 200: + raise Exception(f"Unable to find and download bigcty-{update_date}.zip") tree = html.fromstring(page.content) - dl_url = tree.xpath("//a[contains(@href,'zip')]/@href")[0] + link_urls = tree.xpath("//a[contains(@href,'zip')]/@href") + if len(link_urls) == 0: + raise Exception(f"Unable to find link to bigcty-{update_date}.zip") + dl_url = link_urls[0] rq = session.get(dl_url) - if rq.status_code == 404: - dl_url = f'http://www.country-files.com/bigcty/download/bigcty-{update_date}.zip' - rq = session.get(dl_url) - if rq.status_code != 200: - raise Exception(f"Unable to find and download bigcty-{update_date}.zip") + if rq.status_code != 200: + raise Exception(f"Unable to find and download bigcty-{update_date}.zip") with open(path / 'cty.zip', 'wb+') as file: file.write(rq.content) zipfile.ZipFile(file).extract('cty.dat', path=str(path)) # Force cast as str because mypy