diff --git a/Cargo.lock b/Cargo.lock index daf9e166c989..23670a787704 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2302,6 +2302,7 @@ dependencies = [ "datafusion-physical-expr-common", "hashbrown 0.16.1", "itertools 0.14.0", + "itoa", "log", "paste", "rand 0.9.2", diff --git a/Cargo.toml b/Cargo.toml index eb3c311c1a75..3bcf17d8ed65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -161,6 +161,7 @@ hex = { version = "0.4.3" } indexmap = "2.13.0" insta = { version = "1.46.3", features = ["glob", "filters"] } itertools = "0.14" +itoa = "1.0" liblzma = { version = "0.4.6", features = ["static"] } log = "^0.4" memchr = "2.8.0" diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index ce0ba70c7867..0b26170dbb74 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -59,6 +59,7 @@ datafusion-macros = { workspace = true } datafusion-physical-expr-common = { workspace = true } hashbrown = { workspace = true } itertools = { workspace = true, features = ["use_std"] } +itoa = { workspace = true } log = { workspace = true } paste = { workspace = true } @@ -98,6 +99,10 @@ name = "array_repeat" harness = false name = "array_set_ops" +[[bench]] +harness = false +name = "array_to_string" + [[bench]] harness = false name = "array_position" diff --git a/datafusion/functions-nested/benches/array_to_string.rs b/datafusion/functions-nested/benches/array_to_string.rs new file mode 100644 index 000000000000..286ed4eeb000 --- /dev/null +++ b/datafusion/functions-nested/benches/array_to_string.rs @@ -0,0 +1,188 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, Float64Array, Int64Array, ListArray, StringArray}; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::{DataType, Field}; +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use datafusion_common::ScalarValue; +use datafusion_common::config::ConfigOptions; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; +use datafusion_functions_nested::string::ArrayToString; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; +use std::hint::black_box; +use std::sync::Arc; + +const NUM_ROWS: usize = 1000; +const ARRAY_SIZES: &[usize] = &[5, 20, 100]; +const NESTED_ARRAY_SIZE: usize = 3; +const SEED: u64 = 42; +const NULL_DENSITY: f64 = 0.1; + +fn criterion_benchmark(c: &mut Criterion) { + bench_array_to_string(c, "array_to_string_int64", create_int64_list_array); + bench_array_to_string(c, "array_to_string_float64", create_float64_list_array); + bench_array_to_string(c, "array_to_string_string", create_string_list_array); + bench_array_to_string( + c, + "array_to_string_nested_int64", + create_nested_int64_list_array, + ); +} + +fn bench_array_to_string( + c: &mut Criterion, + group_name: &str, + make_array: impl Fn(usize) -> ArrayRef, +) { + let mut group = c.benchmark_group(group_name); + + for &array_size in ARRAY_SIZES { + let list_array = make_array(array_size); + let args = vec![ + ColumnarValue::Array(list_array.clone()), + ColumnarValue::Scalar(ScalarValue::Utf8(Some(",".to_string()))), + ]; + let arg_fields = vec![ + Field::new("array", list_array.data_type().clone(), true).into(), + Field::new("delimiter", DataType::Utf8, false).into(), + ]; + + group.bench_with_input( + BenchmarkId::from_parameter(array_size), + &array_size, + |b, _| { + let udf = ArrayToString::new(); + b.iter(|| { + black_box( + udf.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: NUM_ROWS, + return_field: Field::new("result", DataType::Utf8, true) + .into(), + config_options: Arc::new(ConfigOptions::default()), + }) + .unwrap(), + ) + }) + }, + ); + } + + group.finish(); +} + +fn create_int64_list_array(array_size: usize) -> ArrayRef { + let mut rng = StdRng::seed_from_u64(SEED); + let values = (0..NUM_ROWS * array_size) + .map(|_| { + if rng.random::() < NULL_DENSITY { + None + } else { + Some(rng.random_range(0..1000)) + } + }) + .collect::(); + let offsets = (0..=NUM_ROWS) + .map(|i| (i * array_size) as i32) + .collect::>(); + + Arc::new( + ListArray::try_new( + Arc::new(Field::new("item", DataType::Int64, true)), + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + ) + .unwrap(), + ) +} + +fn create_nested_int64_list_array(array_size: usize) -> ArrayRef { + let inner = create_int64_list_array(array_size); + let inner_rows = NUM_ROWS; + let outer_rows = inner_rows / NESTED_ARRAY_SIZE; + let offsets = (0..=outer_rows) + .map(|i| (i * NESTED_ARRAY_SIZE) as i32) + .collect::>(); + Arc::new( + ListArray::try_new( + Arc::new(Field::new("item", inner.data_type().clone(), true)), + OffsetBuffer::new(offsets.into()), + inner, + None, + ) + .unwrap(), + ) +} + +fn create_float64_list_array(array_size: usize) -> ArrayRef { + let mut rng = StdRng::seed_from_u64(SEED); + let values = (0..NUM_ROWS * array_size) + .map(|_| { + if rng.random::() < NULL_DENSITY { + None + } else { + Some(rng.random_range(-1000.0..1000.0)) + } + }) + .collect::(); + let offsets = (0..=NUM_ROWS) + .map(|i| (i * array_size) as i32) + .collect::>(); + + Arc::new( + ListArray::try_new( + Arc::new(Field::new("item", DataType::Float64, true)), + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + ) + .unwrap(), + ) +} + +fn create_string_list_array(array_size: usize) -> ArrayRef { + let mut rng = StdRng::seed_from_u64(SEED); + let values = (0..NUM_ROWS * array_size) + .map(|_| { + if rng.random::() < NULL_DENSITY { + None + } else { + Some(format!("value_{}", rng.random_range(0..100))) + } + }) + .collect::(); + let offsets = (0..=NUM_ROWS) + .map(|i| (i * array_size) as i32) + .collect::>(); + + Arc::new( + ListArray::try_new( + Arc::new(Field::new("item", DataType::Utf8, true)), + OffsetBuffer::new(offsets.into()), + Arc::new(values), + None, + ) + .unwrap(), + ) +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index c296f1969e25..8aabc4930956 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -29,6 +29,7 @@ use datafusion_common::utils::ListCoercion; use datafusion_common::{DataFusionError, Result, not_impl_err}; use std::any::Any; +use std::fmt::Write; use crate::utils::make_scalar_function; use arrow::array::{ @@ -36,7 +37,7 @@ use arrow::array::{ builder::{ArrayBuilder, LargeStringBuilder, StringViewBuilder}, cast::AsArray, }; -use arrow::compute::cast; +use arrow::compute::{can_cast_types, cast}; use arrow::datatypes::DataType::{ Dictionary, FixedSizeList, LargeList, LargeUtf8, List, Null, Utf8, Utf8View, }; @@ -54,69 +55,6 @@ use datafusion_functions::downcast_arg; use datafusion_macros::user_doc; use std::sync::Arc; -macro_rules! call_array_function { - ($DATATYPE:expr, false) => { - match $DATATYPE { - DataType::Utf8 => array_function!(StringArray), - DataType::Utf8View => array_function!(StringViewArray), - DataType::LargeUtf8 => array_function!(LargeStringArray), - DataType::Boolean => array_function!(BooleanArray), - DataType::Float32 => array_function!(Float32Array), - DataType::Float64 => array_function!(Float64Array), - DataType::Int8 => array_function!(Int8Array), - DataType::Int16 => array_function!(Int16Array), - DataType::Int32 => array_function!(Int32Array), - DataType::Int64 => array_function!(Int64Array), - DataType::UInt8 => array_function!(UInt8Array), - DataType::UInt16 => array_function!(UInt16Array), - DataType::UInt32 => array_function!(UInt32Array), - DataType::UInt64 => array_function!(UInt64Array), - dt => not_impl_err!("Unsupported data type in array_to_string: {dt}"), - } - }; - ($DATATYPE:expr, $INCLUDE_LIST:expr) => {{ - match $DATATYPE { - DataType::List(_) => array_function!(ListArray), - DataType::Utf8 => array_function!(StringArray), - DataType::Utf8View => array_function!(StringViewArray), - DataType::LargeUtf8 => array_function!(LargeStringArray), - DataType::Boolean => array_function!(BooleanArray), - DataType::Float32 => array_function!(Float32Array), - DataType::Float64 => array_function!(Float64Array), - DataType::Int8 => array_function!(Int8Array), - DataType::Int16 => array_function!(Int16Array), - DataType::Int32 => array_function!(Int32Array), - DataType::Int64 => array_function!(Int64Array), - DataType::UInt8 => array_function!(UInt8Array), - DataType::UInt16 => array_function!(UInt16Array), - DataType::UInt32 => array_function!(UInt32Array), - DataType::UInt64 => array_function!(UInt64Array), - dt => not_impl_err!("Unsupported data type in array_to_string: {dt}"), - } - }}; -} - -macro_rules! to_string { - ($ARG:expr, $ARRAY:expr, $DELIMITER:expr, $NULL_STRING:expr, $WITH_NULL_STRING:expr, $ARRAY_TYPE:ident) => {{ - let arr = downcast_arg!($ARRAY, $ARRAY_TYPE); - for x in arr { - match x { - Some(x) => { - $ARG.push_str(&x.to_string()); - $ARG.push_str($DELIMITER); - } - None => { - if $WITH_NULL_STRING { - $ARG.push_str($NULL_STRING); - $ARG.push_str($DELIMITER); - } - } - } - } - Ok($ARG) - }}; -} - // Create static instances of ScalarUDFs for each function make_udf_expr_and_func!( ArrayToString, @@ -145,7 +83,7 @@ make_udf_expr_and_func!( argument(name = "delimiter", description = "Array element separator."), argument( name = "null_string", - description = "Optional. String to replace null values in the array. If not provided, nulls will be handled by default behavior." + description = "Optional. String to use for null values in the output. If not provided, nulls will be omitted." ) )] #[derive(Debug, PartialEq, Eq, Hash)] @@ -347,8 +285,8 @@ fn array_to_string_inner(args: &[ArrayRef]) -> Result { } }; - let null_strings = if args.len() == 3 { - Some(match args[2].data_type() { + let null_strings: Vec> = if args.len() == 3 { + match args[2].data_type() { Utf8 => args[2].as_string::().iter().collect(), Utf8View => args[2].as_string_view().iter().collect(), LargeUtf8 => args[2].as_string::().iter().collect(), @@ -357,166 +295,247 @@ fn array_to_string_inner(args: &[ArrayRef]) -> Result { "unsupported type for third argument to array_to_string function as {other:?}" ); } - }) + } } else { - None + // If `null_strings` is not specified, we treat it as equivalent to + // explicitly passing a NULL value for `null_strings` in every row. + vec![None; args[0].len()] }; - /// Creates a single string from single element of a ListArray (which is - /// itself another Array) - fn compute_array_to_string<'a>( - arg: &'a mut String, - arr: &ArrayRef, - delimiter: String, - null_string: String, - with_null_string: bool, - ) -> Result<&'a mut String> { - match arr.data_type() { - List(..) => { - let list_array = as_list_array(&arr)?; - for i in 0..list_array.len() { - if !list_array.is_null(i) { - compute_array_to_string( - arg, - &list_array.value(i), - delimiter.clone(), - null_string.clone(), - with_null_string, - )?; - } else if with_null_string { - arg.push_str(&null_string); - arg.push_str(&delimiter); - } - } + let string_arr = match arr.data_type() { + List(_) => { + let list_array = as_list_array(&arr)?; + generate_string_array::(list_array, &delimiters, &null_strings)? + } + LargeList(_) => { + let list_array = as_large_list_array(&arr)?; + generate_string_array::(list_array, &delimiters, &null_strings)? + } + // Signature guards against this arm + _ => return exec_err!("array_to_string expects list as first argument"), + }; - Ok(arg) - } - FixedSizeList(..) => { - let list_array = as_fixed_size_list_array(&arr)?; - - for i in 0..list_array.len() { - if !list_array.is_null(i) { - compute_array_to_string( - arg, - &list_array.value(i), - delimiter.clone(), - null_string.clone(), - with_null_string, - )?; - } else if with_null_string { - arg.push_str(&null_string); - arg.push_str(&delimiter); - } - } + Ok(Arc::new(string_arr)) +} - Ok(arg) - } - LargeList(..) => { - let list_array = as_large_list_array(&arr)?; - for i in 0..list_array.len() { - if !list_array.is_null(i) { - compute_array_to_string( - arg, - &list_array.value(i), - delimiter.clone(), - null_string.clone(), - with_null_string, - )?; - } else if with_null_string { - arg.push_str(&null_string); - arg.push_str(&delimiter); +fn generate_string_array( + list_arr: &GenericListArray, + delimiters: &[Option<&str>], + null_strings: &[Option<&str>], +) -> Result { + let mut builder = StringBuilder::with_capacity(list_arr.len(), 0); + let mut buf = String::new(); + + for ((arr, &delimiter), &null_string) in list_arr + .iter() + .zip(delimiters.iter()) + .zip(null_strings.iter()) + { + let (Some(arr), Some(delimiter)) = (arr, delimiter) else { + builder.append_null(); + continue; + }; + + buf.clear(); + let mut first = true; + compute_array_to_string(&mut buf, &arr, delimiter, null_string, &mut first)?; + builder.append_value(&buf); + } + + Ok(builder.finish()) +} + +fn compute_array_to_string( + buf: &mut String, + arr: &ArrayRef, + delimiter: &str, + null_string: Option<&str>, + first: &mut bool, +) -> Result<()> { + // Handle lists by recursing on each list element. + macro_rules! handle_list { + ($list_array:expr) => { + for i in 0..$list_array.len() { + if !$list_array.is_null(i) { + compute_array_to_string( + buf, + &$list_array.value(i), + delimiter, + null_string, + first, + )?; + } else if let Some(ns) = null_string { + if *first { + *first = false; + } else { + buf.push_str(delimiter); } + buf.push_str(ns); } + } + }; + } - Ok(arg) + match arr.data_type() { + List(..) => { + let list_array = as_list_array(arr)?; + handle_list!(list_array); + Ok(()) + } + FixedSizeList(..) => { + let list_array = as_fixed_size_list_array(arr)?; + handle_list!(list_array); + Ok(()) + } + LargeList(..) => { + let list_array = as_large_list_array(arr)?; + handle_list!(list_array); + Ok(()) + } + Dictionary(_key_type, value_type) => { + // Call cast to unwrap the dictionary. This could be optimized if we wanted + // to accept the overhead of extra code + let values = cast(arr, value_type.as_ref()).map_err(|e| { + DataFusionError::from(e) + .context("Casting dictionary to values in compute_array_to_string") + })?; + compute_array_to_string(buf, &values, delimiter, null_string, first) + } + Null => Ok(()), + data_type => { + macro_rules! str_leaf { + ($ARRAY_TYPE:ident) => { + write_leaf_to_string( + buf, + downcast_arg!(arr, $ARRAY_TYPE), + delimiter, + null_string, + first, + |buf, x: &str| buf.push_str(x), + ) + }; } - Dictionary(_key_type, value_type) => { - // Call cast to unwrap the dictionary. This could be optimized if we wanted - // to accept the overhead of extra code - let values = cast(&arr, value_type.as_ref()).map_err(|e| { - DataFusionError::from(e).context( - "Casting dictionary to values in compute_array_to_string", + macro_rules! bool_leaf { + ($ARRAY_TYPE:ident) => { + write_leaf_to_string( + buf, + downcast_arg!(arr, $ARRAY_TYPE), + delimiter, + null_string, + first, + |buf, x: bool| { + if x { + buf.push_str("true"); + } else { + buf.push_str("false"); + } + }, ) - })?; - compute_array_to_string( - arg, - &values, - delimiter, - null_string, - with_null_string, - ) + }; } - Null => Ok(arg), - data_type => { - macro_rules! array_function { - ($ARRAY_TYPE:ident) => { - to_string!( - arg, - arr, - &delimiter, - &null_string, - with_null_string, - $ARRAY_TYPE - ) - }; - } - call_array_function!(data_type, false) + macro_rules! int_leaf { + ($ARRAY_TYPE:ident) => { + write_leaf_to_string( + buf, + downcast_arg!(arr, $ARRAY_TYPE), + delimiter, + null_string, + first, + |buf, x| { + let mut itoa_buf = itoa::Buffer::new(); + buf.push_str(itoa_buf.format(x)); + }, + ) + }; } - } - } - - fn generate_string_array( - list_arr: &GenericListArray, - delimiters: &[Option<&str>], - null_strings: &Option>>, - ) -> Result { - let mut res: Vec> = Vec::new(); - for (i, (arr, &delimiter)) in list_arr.iter().zip(delimiters.iter()).enumerate() { - if let (Some(arr), Some(delimiter)) = (arr, delimiter) { - let (null_string, with_null_string) = match null_strings { - Some(ns) => match ns[i] { - Some(s) => (s.to_string(), true), - None => (String::new(), false), - }, - None => (String::new(), false), + macro_rules! float_leaf { + ($ARRAY_TYPE:ident) => { + write_leaf_to_string( + buf, + downcast_arg!(arr, $ARRAY_TYPE), + delimiter, + null_string, + first, + |buf, x| { + // TODO: Consider switching to a more efficient + // floating point display library (e.g., ryu). This + // might result in some differences in the output + // format, however. + write!(buf, "{}", x).unwrap(); + }, + ) }; - let mut arg = String::from(""); - let s = compute_array_to_string( - &mut arg, - &arr, - delimiter.to_string(), - null_string, - with_null_string, - )? - .clone(); - - if let Some(s) = s.strip_suffix(delimiter) { - res.push(Some(s.to_string())); - } else { - res.push(Some(s)); + } + match data_type { + Utf8 => str_leaf!(StringArray), + Utf8View => str_leaf!(StringViewArray), + LargeUtf8 => str_leaf!(LargeStringArray), + DataType::Boolean => bool_leaf!(BooleanArray), + DataType::Float32 => float_leaf!(Float32Array), + DataType::Float64 => float_leaf!(Float64Array), + DataType::Int8 => int_leaf!(Int8Array), + DataType::Int16 => int_leaf!(Int16Array), + DataType::Int32 => int_leaf!(Int32Array), + DataType::Int64 => int_leaf!(Int64Array), + DataType::UInt8 => int_leaf!(UInt8Array), + DataType::UInt16 => int_leaf!(UInt16Array), + DataType::UInt32 => int_leaf!(UInt32Array), + DataType::UInt64 => int_leaf!(UInt64Array), + data_type if can_cast_types(data_type, &Utf8) => { + let str_arr = cast(arr, &Utf8).map_err(|e| { + DataFusionError::from(e) + .context("Casting to string in array_to_string") + })?; + return compute_array_to_string( + buf, + &str_arr, + delimiter, + null_string, + first, + ); + } + data_type => { + return not_impl_err!( + "Unsupported data type in array_to_string: {data_type}" + ); } - } else { - res.push(None); } + Ok(()) } - - Ok(StringArray::from(res)) } +} - let string_arr = match arr.data_type() { - List(_) => { - let list_array = as_list_array(&arr)?; - generate_string_array::(list_array, &delimiters, &null_strings)? +/// Appends the string representation of each element in a leaf (non-list) +/// array to `buf`, separated by `delimiter`. Null elements are rendered +/// using `null_string` if provided, or skipped otherwise. The `append` +/// closure controls how each non-null element is written to the buffer. +fn write_leaf_to_string<'a, A, T>( + buf: &mut String, + arr: &'a A, + delimiter: &str, + null_string: Option<&str>, + first: &mut bool, + append: impl Fn(&mut String, T), +) where + &'a A: IntoIterator>, +{ + for x in arr { + // Skip nulls when no null_string is provided + if x.is_none() && null_string.is_none() { + continue; } - LargeList(_) => { - let list_array = as_large_list_array(&arr)?; - generate_string_array::(list_array, &delimiters, &null_strings)? + + if *first { + *first = false; + } else { + buf.push_str(delimiter); } - // Signature guards against this arm - _ => return exec_err!("array_to_string expects list as first argument"), - }; - Ok(Arc::new(string_arr)) + match x { + Some(x) => append(buf, x), + None => buf.push_str(null_string.unwrap()), + } + } } /// String_to_array SQL function diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 5113b9718c4e..17475c6a11d8 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5157,6 +5157,33 @@ select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'Fixed ---- h,-,-,-,o nil-2-nil-4-5 1|0|3 +# array_to_string float formatting: special values and longer decimals +query TTT +select + array_to_string(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), '|'), + array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'LargeList(Float64)'), '|'), + array_to_string(arrow_cast(make_array(CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE), CAST('0.30000000000000004' AS DOUBLE), CAST('1.2345678901234567' AS DOUBLE)), 'FixedSizeList(5, Float64)'), '|'); +---- +NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 NaN|inf|-inf|0.30000000000000004|1.2345678901234567 + +# array_to_string float formatting: scientific-notation inputs +query T +select array_to_string( + make_array( + CAST('1E20' AS DOUBLE), + CAST('-1e+20' AS DOUBLE), + CAST('6.02214076e23' AS DOUBLE), + CAST('1.2345e6' AS DOUBLE), + CAST('1e-5' AS DOUBLE), + CAST('-1e-5' AS DOUBLE), + CAST('9.1093837015e-31' AS DOUBLE), + CAST('-2.5e-4' AS DOUBLE) + ), + '|' +); +---- +100000000000000000000|-100000000000000000000|602214076000000000000000|1234500|0.00001|-0.00001|0.00000000000000000000000000000091093837015|-0.00025 + query T select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-'); ---- @@ -5267,6 +5294,68 @@ MISSING,5,MISSING statement ok DROP TABLE test_null_str_col; +# array_to_string with decimal values +query T +select array_to_string(arrow_cast(make_array(1.5, NULL, 3.14), 'List(Decimal128(10, 2))'), ',', 'N'); +---- +1.50,N,3.14 + +# array_to_string with date values +query T +select array_to_string(arrow_cast(make_array('2024-01-15', '2024-06-30', '2024-12-25'), 'List(Date32)'), ','); +---- +2024-01-15,2024-06-30,2024-12-25 + +query T +select array_to_string(arrow_cast(make_array('2024-01-15', NULL, '2024-12-25'), 'List(Date32)'), ',', 'N'); +---- +2024-01-15,N,2024-12-25 + +# array_to_string with timestamp values +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Second, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Second, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Millisecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Millisecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Microsecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Microsecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +query T +select array_to_string(make_array(arrow_cast('2024-01-15T10:30:00', 'Timestamp(Nanosecond, None)'), arrow_cast('2024-06-30T15:45:00', 'Timestamp(Nanosecond, None)')), '|'); +---- +2024-01-15T10:30:00|2024-06-30T15:45:00 + +# array_to_string with time values +query T +select array_to_string(make_array(arrow_cast('10:30:00', 'Time32(Second)'), arrow_cast('15:45:00', 'Time32(Second)')), ','); +---- +10:30:00,15:45:00 + +query T +select array_to_string(make_array(arrow_cast('10:30:00', 'Time64(Microsecond)'), arrow_cast('15:45:00', 'Time64(Microsecond)')), ','); +---- +10:30:00,15:45:00 + +# array_to_string with interval values +query T +select array_to_string(make_array(interval '1 year 2 months', interval '3 days 4 hours'), ','); +---- +14 mons,3 days 4 hours + +# array_to_string with duration values +query T +select array_to_string(make_array(arrow_cast(1000, 'Duration(Millisecond)'), arrow_cast(2000, 'Duration(Millisecond)')), ','); +---- +PT1S,PT2S + + ## cardinality # cardinality scalar function diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index ee78ce8f9c15..254151c2c20e 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4212,7 +4212,7 @@ array_to_string(array, delimiter[, null_string]) - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - **delimiter**: Array element separator. -- **null_string**: Optional. String to replace null values in the array. If not provided, nulls will be handled by default behavior. +- **null_string**: Optional. String to use for null values in the output. If not provided, nulls will be omitted. #### Example