From 39d7bc8dbc1a9ae6951459d11efe42609f5cc0dd Mon Sep 17 00:00:00 2001 From: Arcadiy Ivanov Date: Mon, 24 Jun 2024 19:07:04 -0400 Subject: [PATCH] Add ability to delete all versions except the most recent one The most recent version is the one containing the most recently created files. Fix the file name parsing logic Update README and documentation fixes #33 --- README.md | 57 ++++++++++++++++++++-- src/main/python/pypi_cleanup/__init__.py | 62 +++++++++++++++++++----- 2 files changed, 102 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 28fb6f7..d716873 100644 --- a/README.md +++ b/README.md @@ -34,15 +34,15 @@ Authentication password may be passed via environment variable Authentication with TOTP is supported. -Examples: +### Examples: ```bash $ pypi-cleanup --help -usage: pypi-cleanup [-h] -u USERNAME -p PACKAGE [-t URL] [-r PATTERNS] [--do-it] [-y] [-v] +usage: pypi-cleanup [-h] [-u USERNAME] -p PACKAGE [-t URL] [-r PATTERNS | --leave-most-recent-only] [--query-only] [--do-it] [-y] [-d DAYS] [-v] -PyPi Package Cleanup Utility +PyPi Package Cleanup Utility v0.1.7.dev20240624230606 -optional arguments: +options: -h, --help show this help message and exit -u USERNAME, --username USERNAME authentication username (default: None) @@ -51,11 +51,16 @@ optional arguments: -t URL, --host URL PyPI :// prefix (default: https://pypi.org/) -r PATTERNS, --version-regex PATTERNS regex to use to match package versions to be deleted (default: None) + --leave-most-recent-only + delete all releases except the *most recent* one, i.e. the one containing the most recently created files (default: False) + --query-only only queries and processes the package, no login required (default: False) --do-it actually perform the destructive delete (default: False) -y, --yes confirm extremely dangerous destructive delete (default: False) + -d DAYS, --days DAYS only delete releases **matching specified patterns** where all files are older than X days (default: 0) -v, --verbose be verbose (default: 0) ``` +#### Regular Cleanup of Development Artifacts ```bash $ pypi-cleanup -u arcivanov -p pybuilder Password: @@ -66,11 +71,53 @@ INFO:root:Deleting pybuilder version 0.12.3.dev20200421010857 INFO:root:Deleted pybuilder version 0.12.3.dev20200421010857 ``` +#### Using Custom Regex Pattern ```bash -$ pypi-cleanup -u arcivanov -p geventmp -n -r '.*\\.dev1$' +$ pypi-cleanup -u arcivanov -p geventmp -r '.*\\.dev1$' +WARNING:root: +WARNING: + You're using custom patterns: [re.compile('.*\\\\.dev1$')]. + If you make a mistake in your patterns you can potentially wipe critical versions irrecoverably. + Make sure to test your patterns before running the destructive cleanup. + Once you're satisfied the patterns are correct re-run with `-y`/`--yes` to confirm you know what you're doing. + Goodbye. +$ pypi-cleanup -u arcivanov -p geventmp -r '.*\\.dev1$' -y Password: WARNING:root:RUNNING IN DRY-RUN MODE INFO:root:Will use the following patterns [re.compile('.*\\.dev1$')] on package geventmp Authentication code: 123456 INFO:root:Deleting geventmp version 0.0.1.dev1 ``` + +#### Deleting All Versions Except The Most Recent One + +```bash +$ pypi-cleanup -p pypi-cleanup --leave-most-recent-only +WARNING:root: +WARNING: + You're trying to delete ALL versions of the package EXCEPT for the *most recent one*, i.e. + the one with the most recent (by the wall clock) files, disregarding the actual version numbers + or versioning schemes! + + You can potentially wipe critical versions irrecoverably. + Make sure this is what you really want before running the destructive cleanup. + Once you're sure you want to delete all versions except the most recent one, + re-run with `-y`/`--yes` to confirm you know what you're doing. + Goodbye. +$ pypi-cleanup -p pypi-cleanup --leave-most-recent-only -y --query-only +INFO:root:Running in DRY RUN mode +INFO:root:Will only leave the MOST RECENT version of the package 'pypi-cleanup' +INFO:root:Leaving the MOST RECENT package version: 0.1.7.dev20240624221535 - 2024-06-24T22:15:52.778775+0000 +INFO:root:Found the following releases to delete: +INFO:root: 0.0.1 +INFO:root: 0.0.2 +INFO:root: 0.0.3 +INFO:root: 0.1.0 +INFO:root: 0.1.1 +INFO:root: 0.1.2 +INFO:root: 0.1.3 +INFO:root: 0.1.4 +INFO:root: 0.1.5 +INFO:root: 0.1.6 +INFO:root:Query-only mode - exiting +``` diff --git a/src/main/python/pypi_cleanup/__init__.py b/src/main/python/pypi_cleanup/__init__.py index 079e4fb..d49727b 100644 --- a/src/main/python/pypi_cleanup/__init__.py +++ b/src/main/python/pypi_cleanup/__init__.py @@ -74,7 +74,7 @@ def handle_endtag(self, tag): class PypiCleanup: - def __init__(self, url, username, package, do_it, patterns, verbose, days, query_only, **_): + def __init__(self, url, username, package, do_it, patterns, verbose, days, query_only, leave_most_recent_only, **_): self.url = urlparse(url).geturl() if self.url[-1] == "/": self.url = self.url[:-1] @@ -84,6 +84,7 @@ def __init__(self, url, username, package, do_it, patterns, verbose, days, query self.patterns = patterns or DEFAULT_PATTERNS self.verbose = verbose self.query_only = query_only + self.leave_most_recent_only = leave_most_recent_only self.date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days) def run(self): @@ -97,7 +98,10 @@ def run(self): else: logging.info("Running in DRY RUN mode") - logging.info(f"Will use the following patterns {self.patterns} on package {self.package!r}") + if not self.leave_most_recent_only: + logging.info(f"Will use the following patterns {self.patterns} on package {self.package!r}") + else: + logging.info(f"Will only leave the MOST RECENT version of the package {self.package!r}") with requests.Session() as s: s.headers.update({"User-Agent": f"pypi-cleanup/{__version__} (requests/{requests_version})"}) @@ -112,21 +116,35 @@ def run(self): project_info = r.json() releases_by_date = {} + + def package_matches_file(p, v, f): + filename = f["filename"].lower() + if filename.endswith(".whl") or filename.endswith(".egg") or filename.endswith(".src.rpm"): + return filename.startswith(f"{p.replace('-', '_')}-{v}-") + + return filename in (f"{p}-{v}.tar.gz", f"{p}-{v}.zip") + for version in project_info["versions"]: releases_by_date[version] = max( - [datetime.datetime.strptime(f["upload-time"], '%Y-%m-%dT%H:%M:%S.%f%z') + [datetime.datetime.strptime(f["upload-time"], "%Y-%m-%dT%H:%M:%S.%f%z") for f in project_info["files"] - if f["filename"].lower().startswith(f"{self.package}-{version}") or - f["filename"].lower().startswith(f"{self.package.replace('-', '_')}-{version}")]) + if package_matches_file(self.package, version, f)]) if not releases_by_date: logging.info(f"No releases for package {self.package!r} have been found") return - pkg_vers = list(filter(lambda k: - any(filter(lambda rex: rex.match(k), - self.patterns)) and releases_by_date[k] < self.date, - releases_by_date.keys())) + if self.leave_most_recent_only: + leave_release = max(releases_by_date, key=releases_by_date.get) + logging.info( + f"Leaving the MOST RECENT package version: {leave_release} - " + f"{releases_by_date[leave_release].strftime('%Y-%m-%dT%H:%M:%S.%f%z')}") + pkg_vers = list(r for r in releases_by_date if r != leave_release) + else: + pkg_vers = list(filter(lambda k: + any(filter(lambda rex: rex.match(k), + self.patterns)) and releases_by_date[k] < self.date, + releases_by_date.keys())) if not pkg_vers: logging.info(f"No releases were found matching specified patterns " @@ -262,8 +280,12 @@ def main(): parser.add_argument("-p", "--package", required=True, help="PyPI package name") parser.add_argument("-t", "--host", default="https://pypi.org/", dest="url", help="PyPI :// prefix") - parser.add_argument("-r", "--version-regex", type=re.compile, action="append", - dest="patterns", help="regex to use to match package versions to be deleted") + g = parser.add_mutually_exclusive_group() + g.add_argument("-r", "--version-regex", type=re.compile, action="append", + dest="patterns", help="regex to use to match package versions to be deleted") + g.add_argument("--leave-most-recent-only", action="store_true", default=False, + help="delete all releases except the *most recent* one, i.e. the one containing " + "the most recently created files") parser.add_argument("--query-only", action="store_true", default=False, help="only queries and processes the package, no login required") parser.add_argument("--do-it", action="store_true", default=False, @@ -271,7 +293,8 @@ def main(): parser.add_argument("-y", "--yes", action="store_true", default=False, dest="confirm", help="confirm extremely dangerous destructive delete") parser.add_argument("-d", "--days", type=int, default=0, - help="only delete releases where all files are older than X days") + help="only delete releases **matching specified patterns** where all files are " + "older than X days") parser.add_argument("-v", "--verbose", action="store_const", const=1, default=0, help="be verbose") args = parser.parse_args() @@ -286,6 +309,21 @@ def main(): \t""")) return 3 + if args.leave_most_recent_only and not args.confirm and not args.do_it: + logging.warning(dedent(""" + WARNING: + \tYou're trying to delete ALL versions of the package EXCEPT for the *most recent one*, i.e. + \tthe one with the most recent (by the wall clock) files, disregarding the actual version numbers + \tor versioning schemes! + \t + \tYou can potentially wipe critical versions irrecoverably. + \tMake sure this is what you really want before running the destructive cleanup. + \tOnce you're sure you want to delete all versions except the most recent one, + \tre-run with `-y`/`--yes` to confirm you know what you're doing. + \tGoodbye. + \t""")) + return 3 + return PypiCleanup(**vars(args)).run() finally: logging.shutdown()