From 409bb81a69f3ea1b354fa209a5b6b9d54ea06419 Mon Sep 17 00:00:00 2001 From: "Reilly.tang" Date: Mon, 27 Nov 2023 18:37:38 +0800 Subject: [PATCH] [fix #5044] Support converting 'yyyymmdd' format to date (#5078) Signed-off-by: tangruilin --- arrow-cast/src/cast.rs | 24 ++++++++++++++++++++++++ arrow-cast/src/parse.rs | 14 +++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 22faedb96f96..3d9d0ee3d920 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -4879,6 +4879,30 @@ mod tests { } } + #[test] + fn test_cast_string_format_yyyymmdd_to_date32() { + let a = Arc::new(StringArray::from(vec![ + Some("2020-12-25"), + Some("20201117"), + ])) as ArrayRef; + + let to_type = DataType::Date32; + let options = CastOptions { + safe: false, + format_options: FormatOptions::default(), + }; + let result = cast_with_options(&a, &to_type, &options).unwrap(); + let c = result.as_primitive::(); + assert_eq!( + chrono::NaiveDate::from_ymd_opt(2020, 12, 25), + c.value_as_date(0) + ); + assert_eq!( + chrono::NaiveDate::from_ymd_opt(2020, 11, 17), + c.value_as_date(1) + ); + } + #[test] fn test_cast_string_to_time32second() { let a1 = Arc::new(StringArray::from(vec![ diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index f01b2b4c0d63..750f38006d33 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -559,8 +559,20 @@ fn parse_date(string: &str) -> Option { const HYPHEN: u8 = b'-'.wrapping_sub(b'0'); + // refer to https://www.rfc-editor.org/rfc/rfc3339#section-3 if digits[4] != HYPHEN { - return None; + let (year, month, day) = match (mask, string.len()) { + (0b11111111, 8) => ( + digits[0] as u16 * 1000 + + digits[1] as u16 * 100 + + digits[2] as u16 * 10 + + digits[3] as u16, + digits[4] * 10 + digits[5], + digits[6] * 10 + digits[7], + ), + _ => return None, + }; + return NaiveDate::from_ymd_opt(year as _, month as _, day as _); } let (month, day) = match mask {