diff --git a/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs b/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs index b14b2a050f8..d1dd6bef39c 100644 --- a/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs @@ -74,7 +74,7 @@ fn arrays_value_equal(a: &ArrayRef, b: &ArrayRef, ctx: &mut ExecutionCtx) -> Vor // Compare values element-wise. Result is null where both inputs are null, // true/false where both are valid. let eq_result = a.binary(b.clone(), Operator::Eq)?; - let eq_result = eq_result.execute::(ctx)?; + let eq_result = eq_result.fill_null(false)?.execute::(ctx)?; Ok(eq_result.true_count() == valid_count) } diff --git a/vortex-array/src/mask.rs b/vortex-array/src/mask.rs index ab035670e24..09032aabbd0 100644 --- a/vortex-array/src/mask.rs +++ b/vortex-array/src/mask.rs @@ -1,8 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::BitAnd; - use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_mask::Mask; @@ -12,19 +10,22 @@ use crate::Executable; use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::BoolArray; -use crate::arrays::Constant; use crate::columnar::Columnar; use crate::dtype::DType; +use crate::dtype::Nullability; impl Executable for Mask { + /// Executes a boolean array into a [`Mask`]. + /// + /// The array must have a non-nullable boolean dtype. To execute a nullable boolean array, + /// coercing null elements to `false`, first call + /// [`ArrayRef::fill_null(false)`](crate::builtins::ArrayBuiltins::fill_null). fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - if !matches!(array.dtype(), DType::Bool(_)) { - vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype()); - } - - if let Some(constant) = array.as_opt::() { - let mask_value = constant.scalar().as_bool().value().unwrap_or(false); - return Ok(Mask::new(array.len(), mask_value)); + if !matches!(array.dtype(), DType::Bool(Nullability::NonNullable)) { + vortex_bail!( + "Mask array must have boolean(NonNullable) dtype, not {}", + array.dtype() + ); } let array_len = array.len(); @@ -34,16 +35,60 @@ impl Executable for Mask { } Columnar::Canonical(a) => { let bool = a.into_array().execute::(ctx)?; - let mask = bool - .as_ref() - .validity()? - .execute_mask(bool.as_ref().len(), ctx)?; - let bits = bool.into_bit_buffer(); - // To handle nullable boolean arrays, we treat nulls as false in the mask. - // TODO(ngates): is this correct? Feels like we should just force the caller to - // pass non-nullable boolean arrays. - mask.bitand(&Mask::from(bits)) + Mask::from(bool.into_bit_buffer()) } }) } } + +#[cfg(test)] +mod tests { + use vortex_error::VortexResult; + use vortex_mask::Mask; + + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::VortexSessionExecute; + use crate::arrays::BoolArray; + use crate::arrays::ConstantArray; + use crate::builtins::ArrayBuiltins; + use crate::dtype::DType; + use crate::dtype::Nullability; + use crate::scalar::Scalar; + + fn ctx() -> ExecutionCtx { + LEGACY_SESSION.create_execution_ctx() + } + + #[test] + fn mask_non_nullable() -> VortexResult<()> { + let array = BoolArray::from_iter([true, false, true]).into_array(); + let mask = array.execute::(&mut ctx())?; + assert_eq!(mask, Mask::from_iter([true, false, true])); + Ok(()) + } + + #[test] + fn mask_rejects_nullable() { + let array = BoolArray::from_iter([Some(true), None]).into_array(); + assert!(array.execute::(&mut ctx()).is_err()); + } + + #[test] + fn fill_null_then_mask_coerces_nulls() -> VortexResult<()> { + let array = BoolArray::from_iter([Some(true), None, Some(false), None]).into_array(); + let mask = array.fill_null(false)?.execute::(&mut ctx())?; + assert_eq!(mask, Mask::from_iter([true, false, false, false])); + Ok(()) + } + + #[test] + fn fill_null_then_mask_null_constant() -> VortexResult<()> { + let array = + ConstantArray::new(Scalar::null(DType::Bool(Nullability::Nullable)), 4).into_array(); + let mask = array.fill_null(false)?.execute::(&mut ctx())?; + assert_eq!(mask, Mask::new_false(4)); + Ok(()) + } +} diff --git a/vortex-cuda/src/layout.rs b/vortex-cuda/src/layout.rs index 20aa26e0b3b..9e6111fc371 100644 --- a/vortex-cuda/src/layout.rs +++ b/vortex-cuda/src/layout.rs @@ -22,6 +22,7 @@ use vortex::array::MaskFuture; use vortex::array::ProstMetadata; use vortex::array::VortexSessionExecute; use vortex::array::arrays::Constant; +use vortex::array::builtins::ArrayBuiltins; use vortex::array::expr::Expression; use vortex::array::expr::stats::Precision; use vortex::array::expr::stats::Stat; @@ -331,12 +332,12 @@ impl LayoutReader for CudaFlatReader { let array = array.apply(&expr)?; let array = array.filter(mask.clone())?; let mut ctx = session.create_execution_ctx(); - let array_mask = array.execute::(&mut ctx)?; + let array_mask = array.fill_null(false)?.execute::(&mut ctx)?; mask.intersect_by_rank(&array_mask) } else { let array = array.apply(&expr)?; let mut ctx = session.create_execution_ctx(); - let array_mask = array.execute::(&mut ctx)?; + let array_mask = array.fill_null(false)?.execute::(&mut ctx)?; mask.bitand(&array_mask) }; diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 002b4b1e902..6c8d91706db 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -16,6 +16,7 @@ use vortex_array::MaskFuture; use vortex_array::VortexSessionExecute; use vortex_array::arrays::DictArray; use vortex_array::arrays::SharedArray; +use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; @@ -216,7 +217,10 @@ impl LayoutReader for DictReader { let mask = mask.await?; let mut ctx = session.create_execution_ctx(); - let dict_mask = values.take(codes)?.execute::(&mut ctx)?; + let dict_mask = values + .take(codes)? + .fill_null(false)? + .execute::(&mut ctx)?; Ok(mask.bitand(&dict_mask)) })) diff --git a/vortex-layout/src/layouts/flat/reader.rs b/vortex-layout/src/layouts/flat/reader.rs index 3f3527aaa6a..e03ffc20e1a 100644 --- a/vortex-layout/src/layouts/flat/reader.rs +++ b/vortex-layout/src/layouts/flat/reader.rs @@ -11,6 +11,7 @@ use tracing::trace; use vortex_array::ArrayRef; use vortex_array::MaskFuture; use vortex_array::VortexSessionExecute; +use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; @@ -156,14 +157,14 @@ impl LayoutReader for FlatReader { let array = array.apply(&expr)?; let array = array.filter(mask.clone())?; let mut ctx = session.create_execution_ctx(); - let array_mask = array.execute::(&mut ctx)?; + let array_mask = array.fill_null(false)?.execute::(&mut ctx)?; mask.intersect_by_rank(&array_mask) } else { // Run over the full array, with a simpler bitand at the end. let array = array.apply(&expr)?; let mut ctx = session.create_execution_ctx(); - let array_mask = array.execute::(&mut ctx)?; + let array_mask = array.fill_null(false)?.execute::(&mut ctx)?; mask.bitand(&array_mask) }; diff --git a/vortex-layout/src/layouts/partitioned.rs b/vortex-layout/src/layouts/partitioned.rs index d5ac3d44c55..cd927bbc074 100644 --- a/vortex-layout/src/layouts/partitioned.rs +++ b/vortex-layout/src/layouts/partitioned.rs @@ -11,6 +11,7 @@ use vortex_array::IntoArray; use vortex_array::MaskFuture; use vortex_array::VortexSessionExecute; use vortex_array::arrays::StructArray; +use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::expr::Expression; @@ -90,7 +91,10 @@ impl PartitionedExprEval

for PartitionedExpr

{ .into_array(); let mut ctx = session.create_execution_ctx(); - let root_mask = root_scope.apply(&self.root)?.execute::(&mut ctx)?; + let root_mask = root_scope + .apply(&self.root)? + .fill_null(false)? + .execute::(&mut ctx)?; let mask = mask.bitand(&root_mask); diff --git a/vortex-layout/src/layouts/row_idx/mod.rs b/vortex-layout/src/layouts/row_idx/mod.rs index 3814c2f8093..2347d05b0bd 100644 --- a/vortex-layout/src/layouts/row_idx/mod.rs +++ b/vortex-layout/src/layouts/row_idx/mod.rs @@ -19,6 +19,7 @@ use vortex_array::Canonical; use vortex_array::IntoArray; use vortex_array::MaskFuture; use vortex_array::VortexSessionExecute; +use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::dtype::FieldName; @@ -295,7 +296,10 @@ fn row_idx_mask_future( let array = idx_array(row_offset, &row_range).into_array(); let mut ctx = session.create_execution_ctx(); - let result_mask = array.apply(&expr)?.execute::(&mut ctx)?; + let result_mask = array + .apply(&expr)? + .fill_null(false)? + .execute::(&mut ctx)?; Ok(result_mask.bitand(&mask.await?)) }) diff --git a/vortex-layout/src/layouts/zoned/zone_map.rs b/vortex-layout/src/layouts/zoned/zone_map.rs index 96154e69571..d7a5b7cd769 100644 --- a/vortex-layout/src/layouts/zoned/zone_map.rs +++ b/vortex-layout/src/layouts/zoned/zone_map.rs @@ -17,6 +17,7 @@ use vortex_array::arrays::ConstantArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; use vortex_array::arrays::struct_::StructArrayExt; +use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::expr::Expression; @@ -123,12 +124,12 @@ impl ZoneMap { let applied = self.array.clone().into_array().apply(&predicate)?; if !contains_row_count(&applied) { - return applied.execute::(&mut ctx); + return applied.fill_null(false)?.execute::(&mut ctx); } let row_count_array = row_count_array(self.zone_len, self.row_count, num_zones)?; let substituted = substitute_row_count(applied, &row_count_array)?; - substituted.execute::(&mut ctx) + substituted.fill_null(false)?.execute::(&mut ctx) } fn lower_stats(&self, predicate: Expression) -> VortexResult {