diff --git a/changes/149.added b/changes/149.added new file mode 100644 index 0000000..308fdcd --- /dev/null +++ b/changes/149.added @@ -0,0 +1 @@ +Add is-subset and is-subset-ci operator modes for list comparison. \ No newline at end of file diff --git a/docs/user/lib_getting_started.md b/docs/user/lib_getting_started.md index d74ee7b..d3c9d38 100644 --- a/docs/user/lib_getting_started.md +++ b/docs/user/lib_getting_started.md @@ -53,7 +53,7 @@ Below are the names of checks provided by the library. These both describe the t - `tolerance`: the keys must match and the values can differ according to the 'tolerance' value provided - `parameter_match`: a reference key and value is provided and its presence (or absence) is checked in the provided object - `regex`: a reference regex pattern is provided and is used to find a match in the provided object -- `operator`: similar to parameter match, but the reference includes several different possible operators: 'in', 'bool', 'string', and numerical comparison with 'int' and 'float' to check against +- `operator`: similar to parameter match, but the reference includes several different possible operators: `in`, `bool`, `string`, `list`, and numerical comparison with `int` and `float` to check against `CheckTypes` are explained in more detail in the [architecture](architecture.md). diff --git a/docs/user/lib_use_cases.md b/docs/user/lib_use_cases.md index f413995..6deda51 100644 --- a/docs/user/lib_use_cases.md +++ b/docs/user/lib_use_cases.md @@ -474,6 +474,14 @@ The `operator` check is a collection of more specific checks divided into catego 2. `not-contains`: determines if an element string value does not contain the provided test-string value. - `not-contains: "overlay"`: checks if "overlay" is present in given node or not. +#### `list` Operators + +1. `is-subset`: Check if the value of a specified element is a subset of the provided reference list. + - `is-subset: ["A", "B", "C"]`: checks if the extracted list contains only values from the provided reference list. + +2. `is-subset-ci`: Check if the value of a specified element is a subset of the provided reference list using case-insensitive comparison. + - `is-subset-ci: ["A", "B", "C"]`: checks if the extracted list contains only values from the provided reference list, ignoring letter case. + #### `int`, `float` Operators 1. `is-gt`: Check if the value of a specified element is greater than a given numeric value. @@ -613,6 +621,88 @@ Can you guess what would be the outcome for an `int`, `float` operator? ([], True) ``` +What about checking whether an extracted list is a subset of an allowed list? + +```python +>>> data = [ +... { +... "id": "DOMAIN1.COMPANY.COM", +... "include_trusted_domains": [ +... "COMPANY.COM", +... "domain1.company.com", +... "domain2.company.COM", +... "domain3.company.com", +... "test.com", +... ], +... } +... ] +>>> path = "[*].[$id$,include_trusted_domains]" +>>> value = extract_data_from_json(data, path) +>>> value +[{'DOMAIN1.COMPANY.COM': {'include_trusted_domains': ['COMPANY.COM', + 'domain1.company.com', + 'domain2.company.COM', + 'domain3.company.com', + 'test.com']}}] +``` + +Using the case-sensitive subset operator: + +```python +>>> check_args = { +... "params": { +... "mode": "is-subset", +... "operator_data": [ +... "COMPANY.COM", +... "domain1.company.com", +... "domain2.company.com", +... "domain3.company.com", +... "domain4.company.com", +... "domain5.company.com", +... "test.com", +... "test1.com", +... "test2.com", +... ], +... } +... } +>>> check = CheckType.create("operator") +>>> result = check.evaluate(check_args, value) +>>> result +([{'DOMAIN1.COMPANY.COM': {'include_trusted_domains': ['COMPANY.COM', + 'domain1.company.com', + 'domain2.company.COM', + 'domain3.company.com', + 'test.com']}}], False) +``` + +The is-subset operator is case-sensitive, so "domain2.company.COM" does not match "domain2.company.com". + +Using the case-insensitive subset operator: + +```python +>>> check_args = { +... "params": { +... "mode": "is-subset-ci", +... "operator_data": [ +... "COMPANY.COM", +... "domain1.company.com", +... "domain2.company.com", +... "domain3.company.com", +... "domain4.company.com", +... "domain5.company.com", +... "test.com", +... "test1.com", +... "test2.com", +... ], +... } +... } +>>> result = check.evaluate(check_args, value) +>>> result +([], True) +``` + +These operators are useful when the extracted value itself is a list and must be validated against an allowed reference list. + See `tests` folder in the repo for more examples. ## Putting a Result Back Together diff --git a/jdiff/check_types.py b/jdiff/check_types.py index 164159f..cb185eb 100644 --- a/jdiff/check_types.py +++ b/jdiff/check_types.py @@ -180,11 +180,13 @@ def _validate(params) -> None: # type: ignore[override] bool_operators = ("all-same",) number_operators = ("is-gt", "is-lt", "is-ge", "is-le") string_operators = ("contains", "not-contains") + list_operators = ("is-subset", "is-subset-ci") valid_options = ( in_operators, bool_operators, number_operators, string_operators, + list_operators, ) # Validate "params" argument is not None. @@ -245,6 +247,13 @@ def _validate(params) -> None: # type: ignore[override] raise ValueError( f"check option all-same must have value of type bool. You have: {params_value} of type {type(params_value)}" ) + # "is-subset" requires lists or tuples + if params_key in list_operators: + if not isinstance(params_value, (list, tuple)): + raise ValueError( + f"check options {list_operators} must have value of type list or tuple. " + f"You have: {params_value} of type {type(params_value)}." + ) def evaluate(self, params: Any, value_to_compare: Any) -> Tuple[Dict, bool]: # type: ignore[override] """Operator evaluator implementation.""" diff --git a/jdiff/operator.py b/jdiff/operator.py index ffa3921..6256eb5 100644 --- a/jdiff/operator.py +++ b/jdiff/operator.py @@ -129,3 +129,43 @@ def in_range(self) -> Tuple[List, bool]: def not_in_range(self) -> Tuple[List, bool]: """Is not in range operator caller.""" return self._loop_through_wrapper("not_in_range") + + def is_subset(self) -> Tuple[List, bool]: + """Check whether each extracted list is a subset of the reference list.""" + result = [] + reference_set = set(self.reference_data) + + for item in self.value_to_compare: + for value in item.values(): + for evaluated_value in value.values(): + # Fail fast if the extracted value is not a list/tuple + if not isinstance(evaluated_value, (list, tuple)): + result.append(item) + continue + + if not set(evaluated_value).issubset(reference_set): + result.append(item) + + if result: + return (result, False) + return ([], True) + + def is_subset_ci(self) -> Tuple[List, bool]: + """Check whether each extracted list is a subset of the reference list (case-insensitive).""" + result = [] + reference_set = {str(item).lower() for item in self.reference_data} + + for item in self.value_to_compare: + for value in item.values(): + for evaluated_value in value.values(): + if not isinstance(evaluated_value, (list, tuple)): + result.append(item) + continue + + normalized_value = {str(element).lower() for element in evaluated_value} + if not normalized_value.issubset(reference_set): + result.append(item) + + if result: + return (result, False) + return ([], True) diff --git a/jdiff/utils/jmespath_parsers.py b/jdiff/utils/jmespath_parsers.py index d9c8644..7459e4e 100644 --- a/jdiff/utils/jmespath_parsers.py +++ b/jdiff/utils/jmespath_parsers.py @@ -74,33 +74,37 @@ def jmespath_refkey_parser(path: str) -> str: def associate_key_of_my_value(paths: str, wanted_value: List) -> List: - """Associate each reference key (from: jmespath_refkey_parser) to every value found in output (from: jmespath_value_parser).""" - # global.peers.*.[is_enabled,is_up] / result.[*].state + """Associate extracted field names with the values returned by the parsed JMESPath value path.""" find_the_key_of_my_values = paths.split(".")[-1] - # [is_enabled,is_up] if find_the_key_of_my_values.startswith("[") and find_the_key_of_my_values.endswith("]"): - # ['is_enabled', 'is_up'] my_key_value_list = find_the_key_of_my_values.strip("[]").split(",") - # state else: my_key_value_list = [find_the_key_of_my_values] final_list = [] - if not all(isinstance(item, list) for item in wanted_value) and len(my_key_value_list) == 1: + if len(my_key_value_list) == 1: + key_name = my_key_value_list[0] + for item in wanted_value: - temp_dict = {my_key_value_list[0]: item} - final_list.append(temp_dict) + if not isinstance(item, list): + value = item + elif len(item) == 1 and not isinstance(item[0], list): + value = item[0] + else: + value = item - else: - for items in wanted_value: - if len(items) != len(my_key_value_list): - raise ValueError("Key's value len != from value len") + final_list.append({key_name: value}) + + return final_list - temp_dict = {my_key_value_list[my_index]: my_value for my_index, my_value in enumerate(items)} + for items in wanted_value: + if len(items) != len(my_key_value_list): + raise ValueError("Key's value len != from value len") - final_list.append(temp_dict) + temp_dict = {my_key_value_list[my_index]: my_value for my_index, my_value in enumerate(items)} + final_list.append(temp_dict) return final_list diff --git a/tests/mock/api/trusted_domains.json b/tests/mock/api/trusted_domains.json new file mode 100644 index 0000000..e781395 --- /dev/null +++ b/tests/mock/api/trusted_domains.json @@ -0,0 +1,29 @@ +[ + { + "allocate_gids": true, + "allocate_uids": true, + "assume_default_domain": false, + "authentication": true, + "check_online_interval": 300, + "controller_time": 1769518575, + "create_home_directory": false, + "domain_offline_alerts": false, + "extra_expected_spns": [], + "findable_groups": [], + "findable_users": [], + "forest": "domain1.company.com", + "groupnet": "groupnet1", + "home_directory_template": "/ifs/zone1/cluster1/adm/%U", + "hostname": "cluster1.domain1.company.com", + "id": "DOMAIN1.COMPANY.COM", + "ignore_all_trusts": false, + "ignored_trusted_domains": [], + "include_trusted_domains": [ + "COMPANY.COM", + "domain1.company.com", + "domain2.company.COM", + "domain3.company.com", + "test.com" + ] + } + ] \ No newline at end of file diff --git a/tests/test_get_value.py b/tests/test_get_value.py index 6304b48..c185f28 100644 --- a/tests/test_get_value.py +++ b/tests/test_get_value.py @@ -139,3 +139,37 @@ def test_top_key_anchor(jmspath, expected_value): value = extract_data_from_json(data=data, path=jmspath) assert value == expected_value, ASSERT_FAIL_MESSAGE.format(output=value, expected_output=expected_value) + + +def test_extract_data_from_json_with_ref_key_and_list_value(): + """Verify that extract_data_from_json correctly handles ref-key paths when the extracted field value is a list.""" + data = [ + { + "id": "DOMAIN1.MYCOMPANY.COM", + "include_trusted_domains": [ + "MYCOMPANY.COM", + "domain1.mycompany.com", + "domain2.mycompany.COM", + "domain3.mycompany.com", + "test_domain.com", + ], + } + ] + + expected_value = [ + { + "DOMAIN1.MYCOMPANY.COM": { + "include_trusted_domains": [ + "MYCOMPANY.COM", + "domain1.mycompany.com", + "domain2.mycompany.COM", + "domain3.mycompany.com", + "test_domain.com", + ] + } + } + ] + + value = extract_data_from_json(data=data, path="[*].[$id$,include_trusted_domains]") + + assert value == expected_value, ASSERT_FAIL_MESSAGE.format(output=value, expected_output=expected_value) diff --git a/tests/test_operators.py b/tests/test_operators.py index 60142fe..2252ef9 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -157,6 +157,65 @@ False, ), ) +operator_is_subset = ( + "trusted_domains.json", + "operator", + { + "params": { + "mode": "is-subset", + "operator_data": [ + "COMPANY.COM", + "domain1.company.com", + "domain2.company.com", + "domain3.company.com", + "domain4.company.com", + "domain5.company.com", + "test.com", + "test1.com", + "test2.com", + ], + } + }, + "[*].[$id$,include_trusted_domains]", + ( + [ + { + "DOMAIN1.COMPANY.COM": { + "include_trusted_domains": [ + "COMPANY.COM", + "domain1.company.com", + "domain2.company.COM", + "domain3.company.com", + "test.com", + ] + } + } + ], + False, + ), +) +operator_is_subset_ci = ( + "trusted_domains.json", + "operator", + { + "params": { + "mode": "is-subset-ci", + "operator_data": [ + "COMPANY.COM", + "domain1.company.com", + "domain2.company.com", + "domain3.company.com", + "domain4.company.com", + "domain5.company.com", + "test.com", + "test1.com", + "test2.com", + ], + } + }, + "[*].[$id$,include_trusted_domains]", + ([], True), +) operator_all_tests = [ operator_all_same, @@ -172,6 +231,8 @@ operator_not_in, operator_in_range, operator_not_in_range, + operator_is_subset, + operator_is_subset_ci, ] diff --git a/tests/test_validates.py b/tests/test_validates.py index 0462f7f..6605bbb 100644 --- a/tests/test_validates.py +++ b/tests/test_validates.py @@ -67,7 +67,7 @@ operator_params_wrong_operator = ( "operator", {"params": {"mode": "random", "operator_data": [20, 40, 60]}}, - "'params' value must be one of the following: ['is-in', 'not-in', 'in-range', 'not-in-range', 'all-same', 'is-gt', 'is-lt', 'is-ge', 'is-le', 'contains', 'not-contains']. You have: random", + "'params' value must be one of the following: ['is-in', 'not-in', 'in-range', 'not-in-range', 'all-same', 'is-gt', 'is-lt', 'is-ge', 'is-le', 'contains', 'not-contains', 'is-subset', 'is-subset-ci']. You have: random", ) operator_params_in = ( "operator", @@ -99,6 +99,11 @@ {"params": {"mode": "all-same", "operator_data": 1}}, "check option all-same must have value of type bool. You have: 1 of type ", ) +operator_params_subset = ( + "operator", + {"params": {"mode": "is-subset", "operator_data": "not-a-list"}}, + "check options ('is-subset', 'is-subset-ci') must have value of type list or tuple. You have: not-a-list of type .", +) all_tests = [ tolerance_wrong_argumet, @@ -120,6 +125,7 @@ operator_params_number, operator_params_contains, operator_params_bool, + operator_params_subset, ]