diff --git a/docs/md/SUMMARY.md b/docs/md/SUMMARY.md index d885d160c3..a6948f642c 100644 --- a/docs/md/SUMMARY.md +++ b/docs/md/SUMMARY.md @@ -54,6 +54,7 @@ - [Overview](./explanation/python.md) - [Installation](./how_to/python/installation.md) - [Loading data into a `Table`](./how_to/python/table.md) + - [`pandas`, `polars` and `pyarrow` integration](./how_to/python/table_data.md) - [Callbacks and events](./how_to/python/callbacks.md) - [Multithreading](./how_to/python/multithreading.md) - [Hosting a WebSocket server](./how_to/python/websocket.md) diff --git a/docs/md/how_to/python/table_data.md b/docs/md/how_to/python/table_data.md new file mode 100644 index 0000000000..8c095cf706 --- /dev/null +++ b/docs/md/how_to/python/table_data.md @@ -0,0 +1,81 @@ +# DataFrame and Arrow Compatibility + +`perspective-python` accepts a `Table` constructor argument from any of the +common Python columnar data libraries. In all three cases, `perspective.table` +(and `Table.update()`) consume the input directly — there is no need to +serialize to Apache Arrow IPC bytes yourself. However, note is +still the most efficient way to bulk load data into `Table`. + +## PyArrow + +```python +import pyarrow as pa +import perspective + +arrow_table = pa.table({ + "int": pa.array([1, 2, 3], type=pa.int64()), + "float": pa.array([1.5, 2.5, 3.5], type=pa.float64()), + "string": pa.array(["a", "b", "c"], type=pa.string()), +}) + +table = perspective.table(arrow_table) +``` + +The same applies to `Table.update()`: + +```python +table.update(arrow_table) +``` + +If you have Arrow data already in IPC format (e.g. read from disk, received +over the wire, or produced by another tool), pass the raw `bytes` directly — +both stream and file formats are auto-detected: + +```python +with open("data.arrow", "rb") as f: + table = perspective.table(f.read()) +``` + +## Polars + +```python +import polars as pl +import perspective + +df = pl.DataFrame({ + "a": [1, 2, 3, 4, 5], + "b": ["x", "y", "z", "x", "y"], +}) + +table = perspective.table(df) +``` + +Internally, the `DataFrame` is converted to a `pyarrow.Table` before +ingestion, so Polars columns inherit the Arrow type mapping above. + +See also Perspective [Virtual Server support for `polars.DataFrame`](./virtual_server/polars.md) + +## Pandas + +`pandas.DataFrame` is supported via `pyarrow.Table.from_pandas`, which +dictates behavior including type support — see the +[pyarrow pandas docs](https://arrow.apache.org/docs/python/pandas.html) for +details on which pandas dtypes round-trip cleanly. + +```python +from datetime import date, datetime +import numpy as np +import pandas as pd +import perspective + +data = pd.DataFrame({ + "int": np.arange(100), + "float": [i * 1.5 for i in range(100)], + "bool": [True for i in range(100)], + "date": [date.today() for i in range(100)], + "datetime": [datetime.now() for i in range(100)], + "string": [str(i) for i in range(100)], +}) + +table = perspective.table(data, index="float") +``` diff --git a/rust/perspective-client/src/rust/config/expressions.rs b/rust/perspective-client/src/rust/config/expressions.rs index e1756ad64a..7ba2969a34 100644 --- a/rust/perspective-client/src/rust/config/expressions.rs +++ b/rust/perspective-client/src/rust/config/expressions.rs @@ -259,7 +259,7 @@ pub struct CompletionItemSuggestion { } #[doc(hidden)] -pub static COMPLETIONS: [CompletionItemSuggestion; 77] = [ +pub static COMPLETIONS: [CompletionItemSuggestion; 79] = [ CompletionItemSuggestion { label: "var", insert_text: "var ${1:x := 1}", @@ -537,6 +537,16 @@ pub static COMPLETIONS: [CompletionItemSuggestion; 77] = [ insert_text: "is_not_null(${1:x})", documentation: "Whether x is not a null value", }, + CompletionItemSuggestion { + label: "coalesce", + insert_text: "coalesce(${1:x}, ${2:y})", + documentation: "Returns the first non-null argument.", + }, + CompletionItemSuggestion { + label: "contains", + insert_text: "contains(${1:x}, ${2:'substr'})", + documentation: "Whether the string column or value contains the literal substring.", + }, CompletionItemSuggestion { label: "not", insert_text: "not(${1:x})", diff --git a/rust/perspective-js/test/js/expressions/numeric.spec.js b/rust/perspective-js/test/js/expressions/numeric.spec.js index e295a04a9d..34895cc2e6 100644 --- a/rust/perspective-js/test/js/expressions/numeric.spec.js +++ b/rust/perspective-js/test/js/expressions/numeric.spec.js @@ -1262,6 +1262,112 @@ function validate_binary_operations(output, expressions, operator) { await table.delete(); }); + test("coalesce returns first non-null arg", async function () { + const table = await perspective.table({ + a: "integer", + b: "integer", + }); + + const view = await table.view({ + expressions: { + coalesce_ab: 'coalesce("a", "b")', + coalesce_ab_default: 'coalesce("a", "b", 99)', + }, + }); + + await table.update({ + a: [1, null, null, 4], + b: [10, 20, null, 40], + }); + + const result = await view.to_columns(); + expect(result["coalesce_ab"]).toEqual([1, 20, null, 4]); + expect(result["coalesce_ab_default"]).toEqual([1, 20, 99, 4]); + await view.delete(); + await table.delete(); + }); + + test("coalesce promotes mixed numeric inputs to float", async function () { + const table = await perspective.table({ + i: "integer", + f: "float", + }); + + const view = await table.view({ + expressions: { + coalesce_if: 'coalesce("i", "f")', + coalesce_if_default: 'coalesce("i", "f", 0.5)', + }, + }); + + await table.update({ + i: [1, null, null, 4], + f: [null, 2.5, null, 4.5], + }); + + const result = await view.to_columns(); + const schema = await view.expression_schema(); + expect(schema["coalesce_if"]).toEqual("float"); + expect(schema["coalesce_if_default"]).toEqual("float"); + expect(result["coalesce_if"]).toEqual([1, 2.5, null, 4]); + expect(result["coalesce_if_default"]).toEqual([1, 2.5, 0.5, 4]); + + await view.delete(); + await table.delete(); + }); + + test("coalesce with all-null inputs returns null", async function () { + const table = await perspective.table({ + a: "integer", + b: "integer", + }); + + const view = await table.view({ + expressions: { coalesce_nulls: 'coalesce("a", "b")' }, + }); + + await table.update({ + a: [null, null, null], + b: [null, null, null], + }); + + const result = await view.to_columns(); + expect(result["coalesce_nulls"]).toEqual([null, null, null]); + await view.delete(); + await table.delete(); + }); + + test("coalesce fails validation for incompatible types", async function () { + const table = await perspective.table({ + a: "integer", + b: "string", + }); + + const validated = await table.validate_expressions([ + 'coalesce("a", "b")', + "coalesce(\"a\", 'fallback')", + ]); + + expect(validated.expression_schema).toEqual({}); + expect(validated.errors['coalesce("a", "b")']).toEqual({ + column: 0, + line: 0, + error_message: + "Type Error - inputs do not resolve to a valid expression.", + }); + + expect(validated.errors["coalesce(\"a\", 'fallback')"]).toEqual( + { + column: 0, + line: 0, + error_message: + "Type Error - inputs do not resolve to a valid expression.", + }, + ); + + await table.delete(); + }); + test("null", async function () { const table = await perspective.table({ a: "integer", diff --git a/rust/perspective-js/test/js/expressions/string.spec.js b/rust/perspective-js/test/js/expressions/string.spec.js index 8d1a323709..0575f5a3e9 100644 --- a/rust/perspective-js/test/js/expressions/string.spec.js +++ b/rust/perspective-js/test/js/expressions/string.spec.js @@ -371,6 +371,30 @@ const random_string = ( table.delete(); }); + test("Coalesce strings", async function () { + const table = await perspective.table({ + a: ["ABC", null, null, "HIjK", null], + b: ["xyz", "DEF", null, "stu", null], + }); + const view = await table.view({ + expressions: { + coalesce_str: 'coalesce("a", "b", \'N/A\')', + }, + }); + const result = await view.to_columns(); + const schema = await view.expression_schema(); + expect(schema["coalesce_str"]).toEqual("string"); + expect(result["coalesce_str"]).toEqual([ + "ABC", + "DEF", + "N/A", + "HIjK", + "N/A", + ]); + view.delete(); + table.delete(); + }); + test("Concat", async function () { const table = await perspective.table({ a: ["abc", "deeeeef", "fg", "hhs", "abcdefghijk"], diff --git a/rust/perspective-js/test/js/expressions/string_slices.spec.js b/rust/perspective-js/test/js/expressions/string_slices.spec.js new file mode 100644 index 0000000000..d94c244cbc --- /dev/null +++ b/rust/perspective-js/test/js/expressions/string_slices.spec.js @@ -0,0 +1,143 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import { test, expect } from "@perspective-dev/test"; +import perspective from "../perspective_client"; + +// Concrete use cases from issue #1527 ("Better string functions in +// expressions"). The original issue body sketched these in a hypothetical +// dialect; the tests below port them to the dialect Perspective actually +// implements. Differences from the issue's pseudocode: +// +// - `find(str, substr) -> int` does not exist. The closest function is +// `indexof(col, regex, out_vec) -> bool`, which performs a *regex* search, +// writes [start, end] of the first capturing group into `out_vec`, and +// requires the regex to have at least one capturing group (else it +// returns STATUS_CLEAR). The tests therefore wrap the literal char in a +// capturing group: `' '` -> `'( )'`, `','` -> `'(,)'`, `$` -> `'([$])'`. +// - `null()` is not a function call; `null` is a literal. +// - `strlen(s)` -> `length(s)`. +// - `substring(s, start, count)` takes a *count*, not an end index, and +// returns null if `start + count > length(s)`. +// - String literals pass through ExprTK's `cleanup_escapes`, which drops +// unrecognized escape characters (`\s` -> `s`, `\.` -> `.`). + +((perspective) => { + test.describe("Issue 1527 use cases", function () { + test("contains literal substring", async function () { + const table = await perspective.table({ + a: ["abcdef", "xyz", "abXabY", null, "abc"], + }); + + const view = await table.view({ + expressions: { + has_ab: "contains(\"a\", 'ab')", + }, + }); + + const result = await view.to_columns(); + const schema = await view.expression_schema(); + expect(schema["has_ab"]).toEqual("boolean"); + expect(result["has_ab"]).toEqual([true, false, true, null, true]); + view.delete(); + table.delete(); + }); + + // Parse "USD $1000"-style strings into Currency (string) and Value + // (float) columns, tolerant of malformed rows. + test("split currency/value string column", async function () { + const table = await perspective.table({ + "Bad Column": [ + "USD $1000", + "EUR $250", + "malformed", + null, + "GBP $42", + ], + }); + const view = await table.view({ + expressions: { + Currency: `var v[2]; +if (indexof("Bad Column", '( )', v)) { substring("Bad Column", 0, v[0]) } else { null }`, + Value: `var v[2]; +if (indexof("Bad Column", '([$])', v)) { float(substring("Bad Column", v[0] + 1)) } else { null }`, + }, + }); + const result = await view.to_columns(); + const schema = await view.expression_schema(); + expect(schema["Currency"]).toEqual("string"); + expect(schema["Value"]).toEqual("float"); + expect(result["Currency"]).toEqual([ + "USD", + "EUR", + null, + null, + "GBP", + ]); + expect(result["Value"]).toEqual([1000, 250, null, null, 42]); + view.delete(); + table.delete(); + }); + + // Parse "(123, 456)"-style strings into Longitude and Latitude + // float columns. + test("split longitude/latitude string column", async function () { + const table = await perspective.table({ + Coords: [ + "(123, 456)", + "(1.5, -2.25)", + "broken", + null, + "(0, 0)", + ], + }); + const view = await table.view({ + expressions: { + Longitude: `var v[2]; +if (indexof("Coords", '(,)', v)) { float(substring("Coords", 1, v[0] - 1)) } else { null }`, + Latitude: `var v[2]; +if (indexof("Coords", '(,)', v)) { float(substring("Coords", v[0] + 1, length("Coords") - v[0] - 2)) } else { null }`, + }, + }); + const result = await view.to_columns(); + const schema = await view.expression_schema(); + expect(schema["Longitude"]).toEqual("float"); + expect(schema["Latitude"]).toEqual("float"); + expect(result["Longitude"]).toEqual([123, 1.5, null, null, 0]); + expect(result["Latitude"]).toEqual([456, -2.25, null, null, 0]); + view.delete(); + table.delete(); + }); + + // Normalize spelling variants by stripping dots and whitespace. + test("replace_all regex strips dots/whitespace", async function () { + const table = await perspective.table({ + State: ["NC", "N.C.", "N. C.", "N .C.", "VA"], + }); + const view = await table.view({ + expressions: { + Normalized: `replace_all("State", '[. ]', '')`, + }, + }); + const result = await view.to_columns(); + expect(result["Normalized"]).toEqual([ + "NC", + "NC", + "NC", + "NC", + "VA", + ]); + view.delete(); + table.delete(); + }); + }); +})(perspective); diff --git a/rust/perspective-python/perspective/tests/conftest.py b/rust/perspective-python/perspective/tests/conftest.py index f279d3a7b9..d2ab3da542 100644 --- a/rust/perspective-python/perspective/tests/conftest.py +++ b/rust/perspective-python/perspective/tests/conftest.py @@ -109,26 +109,17 @@ def make_arrow(names, data, types=None, legacy=False): @staticmethod def make_arrow_from_pandas(df, schema=None, legacy=False): - """Create an arrow binary from a Pandas dataframe. + """Create a pyarrow Table from a Pandas dataframe. Args: df (:obj:`pandas.DataFrame`) schema (:obj:`pyarrow.Schema`) - legacy (bool): if True, use legacy IPC format (pre-pyarrow 0.15). Defaults to False. + legacy (bool): unused; retained for backwards compatibility. Returns: - bytes : a bytes object containing the arrow-serialized output. + pyarrow.Table """ - stream = pa.BufferOutputStream() - table = pa.Table.from_pandas(df, schema=schema) - - writer = pa.RecordBatchStreamWriter( - stream, table.schema - ) - - writer.write_table(table) - writer.close() - return stream.getvalue().to_pybytes() + return pa.Table.from_pandas(df, schema=schema) @staticmethod def make_dictionary_arrow(names, data, types=None, legacy=False): diff --git a/rust/perspective-python/perspective/tests/table/test_table_arrow.py b/rust/perspective-python/perspective/tests/table/test_table_arrow.py index a8207d92e4..27f641cb1c 100644 --- a/rust/perspective-python/perspective/tests/table/test_table_arrow.py +++ b/rust/perspective-python/perspective/tests/table/test_table_arrow.py @@ -15,10 +15,8 @@ import pandas as pd from perspective.tests.conftest import Util import pyarrow as pa -import pyarrow.ipc as ipc from datetime import date, datetime import perspective as psp -import io client = psp.Server().new_local_client() @@ -49,14 +47,23 @@ ALL_INTEGERS_TABLE = pa.Table.from_pydict(ALL_INTEGERS_DATA) -bytes_io = io.BytesIO() -with ipc.new_stream(bytes_io, ALL_INTEGERS_TABLE.schema) as stream: - stream.write_table(ALL_INTEGERS_TABLE) -ALL_INTEGERS_ARROW = bytes_io.getvalue() class TestTableArrow(object): def test_table_with_integer_types(self): - tbl = Table(ALL_INTEGERS_ARROW) + tbl = Table(ALL_INTEGERS_TABLE) + assert tbl.size() == 3 + assert tbl.schema() == { + "int8": "integer", + "int16": "integer", + "int32": "integer", + "int64": "integer", + "uint8": "integer", + "uint16": "integer", + "uint32": "integer", + "uint64": "integer", + "float32": "float", + "float64": "float", + } for k, values in ALL_INTEGERS_DATA.items(): v = tbl.view(filter=[[k, "==", values[0].as_py()]]) assert len(v.to_json()) == 1 @@ -481,17 +488,7 @@ def test_table_arrow_loads_arrow_from_df_with_nan(self): assert arrow_table["a"].null_count == 4 - # write arrow to stream - stream = pa.BufferOutputStream() - writer = pa.RecordBatchStreamWriter( - stream, arrow_table.schema - ) - writer.write_table(arrow_table) - writer.close() - arrow = stream.getvalue().to_pybytes() - - # load - tbl = Table(arrow) + tbl = Table(arrow_table) assert tbl.size() == 8 # check types diff --git a/rust/perspective-python/perspective/tests/table/test_update.py b/rust/perspective-python/perspective/tests/table/test_update.py index 1089846b25..50b63e11b3 100644 --- a/rust/perspective-python/perspective/tests/table/test_update.py +++ b/rust/perspective-python/perspective/tests/table/test_update.py @@ -32,16 +32,7 @@ def test_update_with_missing_or_null_values(self): arrow_table = pa.Table.from_pandas(data, preserve_index=False) - # write arrow to stream - stream = pa.BufferOutputStream() - writer = pa.RecordBatchStreamWriter( - stream, arrow_table.schema - ) - writer.write_table(arrow_table) - writer.close() - arrow = stream.getvalue().to_pybytes() - - tbl.update(arrow) + tbl.update(arrow_table) assert tbl.size() == 2 assert tbl.view().to_records() == [{"a": "1", "b": ""}, {"a": "3", "b": "4"}] diff --git a/rust/perspective-server/cpp/perspective/src/cpp/computed_expression.cpp b/rust/perspective-server/cpp/perspective/src/cpp/computed_expression.cpp index c96f7c7cfa..6477d629e7 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/computed_expression.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/computed_expression.cpp @@ -34,6 +34,12 @@ computed_function::min_fn t_computed_expression_parser::MIN_FN = computed_function::max_fn t_computed_expression_parser::MAX_FN = computed_function::max_fn(); +computed_function::coalesce t_computed_expression_parser::COALESCE_FN = + computed_function::coalesce(); + +computed_function::contains t_computed_expression_parser::CONTAINS_FN = + computed_function::contains(); + computed_function::diff3 t_computed_expression_parser::diff3 = computed_function::diff3(); @@ -534,6 +540,12 @@ t_computed_function_store::register_computed_functions( sym_table.add_reserved_function( "max", t_computed_expression_parser::MAX_FN ); + sym_table.add_function( + "coalesce", t_computed_expression_parser::COALESCE_FN + ); + sym_table.add_function( + "contains", t_computed_expression_parser::CONTAINS_FN + ); sym_table.add_reserved_function( "diff3", t_computed_expression_parser::diff3 ); diff --git a/rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp b/rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp index 6fe390fe0c..a81f4593e4 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp @@ -485,6 +485,36 @@ match_all::operator()(t_parameter_list parameters) { return rval; } +contains::contains() : exprtk::igeneric_function("TS") {} + +contains::~contains() = default; + +t_tscalar +contains::operator()(t_parameter_list parameters) { + t_tscalar rval; + rval.clear(); + rval.m_type = DTYPE_BOOL; + + t_scalar_view str_view(parameters[0]); + t_string_view needle_view(parameters[1]); + + t_tscalar str = str_view(); + std::string needle = + std::string(needle_view.begin(), needle_view.end()); + + if (str.get_dtype() != DTYPE_STR || str.m_status == STATUS_CLEAR) { + rval.m_status = STATUS_CLEAR; + return rval; + } + + if (!str.is_valid()) { + return rval; + } + + rval.set(str.to_string().find(needle) != std::string::npos); + return rval; +} + search::search( t_expression_vocab& expression_vocab, t_regex_mapping& regex_mapping, @@ -1769,6 +1799,78 @@ max_fn::operator()(t_parameter_list parameters) { return rval; } +coalesce::coalesce() = default; + +coalesce::~coalesce() = default; + +t_tscalar +coalesce::operator()(t_parameter_list parameters) { + t_tscalar rval; + rval.clear(); + + if (parameters.size() == 0) { + rval.m_status = STATUS_CLEAR; + return rval; + } + + std::vector inputs; + inputs.resize(parameters.size()); + + // Pass 1: type-check all parameters and resolve the output dtype. + // Loose matching: any combination of numeric types promotes to FLOAT64, + // matching the precedent set by min_fn / max_fn. Non-numeric types + // must share an exact dtype. + bool all_numeric = true; + t_dtype first_dtype = DTYPE_NONE; + + for (auto i = 0; i < parameters.size(); ++i) { + t_generic_type& gt = parameters[i]; + if (gt.type != t_generic_type::e_scalar) { + rval.m_status = STATUS_CLEAR; + return rval; + } + + t_scalar_view _temp(gt); + t_tscalar temp = _temp(); + inputs[i] = temp; + + auto dt = static_cast(temp.m_type); + bool numeric = is_numeric_type(dt); + + if (i == 0) { + first_dtype = dt; + all_numeric = numeric; + } else if (all_numeric && numeric) { + // both numeric so far - stays in promotion path + } else if (!all_numeric && dt == first_dtype) { + // exact-match path for non-numeric types + } else { + rval.m_status = STATUS_CLEAR; + return rval; + } + } + + rval.m_type = all_numeric ? DTYPE_FLOAT64 : first_dtype; + + // Pass 2: return the first valid, non-none scalar. During the type- + // validation pass all inputs are STATUS_INVALID sentinels, so this + // loop falls through and returns rval with the resolved dtype and + // STATUS_INVALID, which the validator accepts. + for (auto i = 0; i < inputs.size(); ++i) { + const t_tscalar& val = inputs[i]; + if (val.is_valid() && !val.is_none()) { + if (all_numeric) { + rval.set(val.to_double()); + } else { + rval.set(val); + } + return rval; + } + } + + return rval; +} + diff3::diff3() : exprtk::igeneric_function("VVV") {} diff3::~diff3() = default; diff --git a/rust/perspective-server/cpp/perspective/src/cpp/server.cpp b/rust/perspective-server/cpp/perspective/src/cpp/server.cpp index eb81b97cc2..bfa159a90b 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/server.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/server.cpp @@ -334,7 +334,7 @@ re_intern_strings(std::string&& expression) { static auto re_unintern_some_exprs(std::string&& expression) { static const RE2 interned_param( - "(?:match|match_all|search|indexof|replace|replace_all)\\(" + "(?:match|match_all|search|indexof|replace|replace_all|contains)\\(" "(?:.*?,\\s*(intern\\(('.*?')\\)))" ); static const RE2 intern_match("intern\\(('.*?')\\)"); diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/computed_expression.h b/rust/perspective-server/cpp/perspective/src/include/perspective/computed_expression.h index 36c2d3664e..4401449cd6 100644 --- a/rust/perspective-server/cpp/perspective/src/include/perspective/computed_expression.h +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/computed_expression.h @@ -117,6 +117,8 @@ class PERSPECTIVE_EXPORT t_computed_expression_parser { static computed_function::inrange_fn INRANGE_FN; static computed_function::min_fn MIN_FN; static computed_function::max_fn MAX_FN; + static computed_function::coalesce COALESCE_FN; + static computed_function::contains CONTAINS_FN; static computed_function::diff3 diff3; static computed_function::norm3 norm3; static computed_function::cross_product3 cross_product3; diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/computed_function.h b/rust/perspective-server/cpp/perspective/src/include/perspective/computed_function.h index c92f14affa..74418dab45 100644 --- a/rust/perspective-server/cpp/perspective/src/include/perspective/computed_function.h +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/computed_function.h @@ -128,7 +128,11 @@ namespace computed_function { * @brief Given a string column and a non-regex string literal, check * whether each row in the string column contains the string literal. */ - STRING_FUNCTION_HEADER(contains) + struct contains final : public exprtk::igeneric_function { + contains(); + ~contains(); + t_tscalar operator()(t_parameter_list parameters) override; + }; /** * @brief match(string, pattern) => True if the string or a substring @@ -323,6 +327,13 @@ namespace computed_function { */ FUNCTION_HEADER(max_fn) + /** + * @brief Return the first non-null argument. Variadic; all numeric + * inputs are promoted to FLOAT64. Non-numeric inputs must share an + * exact dtype. + */ + FUNCTION_HEADER(coalesce) + /** * @brief Get the cross product of two vec3s */