From f9c3c764740917a0f05fdc7062f72bea71c51c01 Mon Sep 17 00:00:00 2001 From: Alex Laird Date: Fri, 1 Nov 2024 13:21:04 -0500 Subject: [PATCH] Further support for Amazon's new `data-component` tag on order price, seller, and return eligibility. Better date parsing. --- CHANGELOG.md | 11 +++++++++++ amazonorders/__init__.py | 2 +- amazonorders/constants.py | 6 ++++++ amazonorders/entity/item.py | 11 ++++++----- amazonorders/entity/order.py | 12 ++++++------ amazonorders/entity/parsable.py | 24 ++++++++++++++++++++++++ amazonorders/selectors.py | 8 ++++++-- amazonorders/util.py | 3 +++ 8 files changed, 63 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2ab14c..e59a6c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,17 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased](https://github.com/alexdlaird/amazon-orders/compare/2.0.2...HEAD) +## [2.0.3](https://github.com/alexdlaird/amazon-orders/compare/2.0.2...2.0.3) - 2024-11-01 + +### Added + +- Further support for Amazon's new `data-component` tag on order price, seller, and return eligibility, and fixing an issue with `Shipment` parsing. +- [`Parsable.to_date()`](https://amazon-orders.readthedocs.io/api.html#amazonorders.entity.parsable.Parsable.to_date) attempts multiple date formats. + +### Fixed + +- An issue with `Shipment`s parsing with Amazon's new `data-component`. + ## [2.0.2](https://github.com/alexdlaird/amazon-orders/compare/2.0.1...2.0.2) - 2024-10-30 ### Added diff --git a/amazonorders/__init__.py b/amazonorders/__init__.py index 07e8768..d486db3 100644 --- a/amazonorders/__init__.py +++ b/amazonorders/__init__.py @@ -1,3 +1,3 @@ __copyright__ = "Copyright (c) 2024 Alex Laird" __license__ = "MIT" -__version__ = "2.0.2" +__version__ = "2.0.3" diff --git a/amazonorders/constants.py b/amazonorders/constants.py index 405b37c..66ca577 100644 --- a/amazonorders/constants.py +++ b/amazonorders/constants.py @@ -53,3 +53,9 @@ class Constants: "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/120.0.0.0 Safari/537.36", } + + ########################################################################## + # Formats + ########################################################################## + + VALID_DATE_FORMATS = ["%b %d, %Y", "%B %d, %Y"] diff --git a/amazonorders/entity/item.py b/amazonorders/entity/item.py index d6651a6..52ffe9d 100644 --- a/amazonorders/entity/item.py +++ b/amazonorders/entity/item.py @@ -2,7 +2,7 @@ __license__ = "MIT" import logging -from datetime import date, datetime +from datetime import date from typing import Optional, TypeVar from bs4 import Tag @@ -35,11 +35,11 @@ def __init__(self, link=True, required=True) #: The Item price. self.price: Optional[float] = self.to_currency( - self.safe_simple_parse(selector=self.config.selectors.FIELD_ITEM_TAG_ITERATOR_SELECTOR, + self.safe_simple_parse(selector=self.config.selectors.FIELD_ITEM_PRICE_SELECTOR, prefix_split="$")) #: The Item Seller. self.seller: Optional[Seller] = self.safe_simple_parse( - selector=self.config.selectors.FIELD_ITEM_TAG_ITERATOR_SELECTOR, + selector=self.config.selectors.FIELD_ITEM_SELLER_SELECTOR, text_contains="Sold by:", wrap_tag=Seller) #: The Item condition. @@ -69,7 +69,7 @@ def __lt__(self, def _parse_return_eligible_date(self) -> Optional[date]: value = None - for tag in util.select(self.parsed, self.config.selectors.FIELD_ITEM_TAG_ITERATOR_SELECTOR): + for tag in util.select(self.parsed, self.config.selectors.FIELD_ITEM_RETURN_SELECTOR): if "Return" in tag.text: tag_str = tag.text.strip() split_str = "through " @@ -77,6 +77,7 @@ def _parse_return_eligible_date(self) -> Optional[date]: split_str = "closed on " if split_str in tag_str: date_str = tag_str.split(split_str)[1] - value = datetime.strptime(date_str, "%b %d, %Y").date() + value = self.to_date(date_str) + break return value diff --git a/amazonorders/entity/order.py b/amazonorders/entity/order.py index 0e6443a..d009fd9 100644 --- a/amazonorders/entity/order.py +++ b/amazonorders/entity/order.py @@ -3,7 +3,7 @@ import json import logging -from datetime import date, datetime +from datetime import date from typing import Any, List, Optional, TypeVar, Union from bs4 import BeautifulSoup, Tag @@ -137,8 +137,8 @@ def _parse_order_placed_date(self) -> date: else: split_str = "Order placed" - value = value.split(split_str)[1].strip() - value = datetime.strptime(value, "%B %d, %Y").date() + date_str = value.split(split_str)[1].strip() + value = self.to_date(date_str) return value @@ -149,7 +149,7 @@ def _parse_recipient(self) -> Optional[Recipient]: value = util.select_one(self.parsed, self.config.selectors.FIELD_ORDER_ADDRESS_FALLBACK_1_SELECTOR) if value: - data_popover = value.get("data-a-popover", {}) # type: ignore[arg-type] + data_popover = value.get("data-a-popover", {}) # type: ignore[arg-type, var-annotated] inline_content = data_popover.get("inlineContent") # type: ignore[union-attr] if inline_content: value = BeautifulSoup(json.loads(inline_content), "html.parser") @@ -272,7 +272,7 @@ def _parse_order_shipping_date(self) -> Optional[date]: if value: date_str = value.split("-")[0].strip() - value = datetime.strptime(date_str, "%B %d, %Y").date() + value = self.to_date(date_str) return value @@ -282,7 +282,7 @@ def _parse_refund_completed_date(self) -> Optional[date]: if value: date_str = value.split("-")[0].strip() - value = datetime.strptime(date_str, "%B %d, %Y").date() + value = self.to_date(date_str) return value diff --git a/amazonorders/entity/parsable.py b/amazonorders/entity/parsable.py index 4ccc396..db51f04 100644 --- a/amazonorders/entity/parsable.py +++ b/amazonorders/entity/parsable.py @@ -2,6 +2,7 @@ __license__ = "MIT" import logging +from datetime import date, datetime from typing import Any, Callable, Optional, Type, Union, Dict from bs4 import Tag @@ -169,3 +170,26 @@ def to_currency(self, return None return currency + + def to_date(self, + date_str: str) -> Optional[date]: + """ + Return the given date string as a date object. + + :param date_str: The date string to parse to a date object. + :return: The parsed date. + """ + value = None + + for fmt in self.config.constants.VALID_DATE_FORMATS: + try: + value = datetime.strptime(date_str, fmt).date() + except ValueError: + pass + + if value is None: + logger.warning( + f"ValueError: time data '{date_str}' does not match any format in " + f"{self.config.constants.VALID_DATE_FORMATS}") + + return value diff --git a/amazonorders/selectors.py b/amazonorders/selectors.py index a87114a..5a9031f 100644 --- a/amazonorders/selectors.py +++ b/amazonorders/selectors.py @@ -38,7 +38,8 @@ class Selectors: ORDER_HISTORY_ENTITY_SELECTOR = ["div.order", "div.order-card"] ORDER_DETAILS_ENTITY_SELECTOR = ["div#orderDetails", "div#ordersContainer", "[data-component='orderCard']"] ITEM_ENTITY_SELECTOR = ["div:has(> div.yohtmlc-item)", ".item-box", "[data-component='purchasedItems']"] - SHIPMENT_ENTITY_SELECTOR = ["div.shipment", "div.delivery-box", "[data-component='shipments']"] + SHIPMENT_ENTITY_SELECTOR = ["div.shipment", "div.delivery-box", + "[data-component='orderCard'] [data-component='shipments']"] ##################################### # CSS selectors for Item fields @@ -48,7 +49,10 @@ class Selectors: FIELD_ITEM_QUANTITY_SELECTOR = ["span.item-view-qty", "span.product-image__qty", "[data-component='itemQuantity']"] FIELD_ITEM_TITLE_SELECTOR = [".yohtmlc-item a", ".yohtmlc-product-title", "[data-component='itemTitle']"] FIELD_ITEM_LINK_SELECTOR = [".yohtmlc-item a", "a:has(> .yohtmlc-product-title)", "[data-component='itemTitle'] a"] - FIELD_ITEM_TAG_ITERATOR_SELECTOR = [".yohtmlc-item div", "[data-component='purchasedItemsRightGrid']"] + FIELD_ITEM_TAG_ITERATOR_SELECTOR = [".yohtmlc-item div"] + FIELD_ITEM_PRICE_SELECTOR = ["[data-component='unitPrice']"] + FIELD_ITEM_TAG_ITERATOR_SELECTOR + FIELD_ITEM_SELLER_SELECTOR = ["[data-component='orderedMerchantName']"] + FIELD_ITEM_TAG_ITERATOR_SELECTOR + FIELD_ITEM_RETURN_SELECTOR = ["[data-component='itemReturnEligibility']"] + FIELD_ITEM_TAG_ITERATOR_SELECTOR ##################################### # CSS selectors for Order fields diff --git a/amazonorders/util.py b/amazonorders/util.py index 3cbe42c..739d655 100644 --- a/amazonorders/util.py +++ b/amazonorders/util.py @@ -2,10 +2,13 @@ __license__ = "MIT" import importlib +import logging from typing import List, Union, Optional, Callable from bs4 import Tag +logger = logging.getLogger(__name__) + def select(parsed: Tag, selector: Union[List[str], str]) -> List[Tag]: """