diff --git a/CHANGELOG.md b/CHANGELOG.md index 83fb8fc7..1550607d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.35.2] - 2026-03-02 +### Added +- `parse_statedump` parameter to `run_parser()` method for converting + statedump strings into structured JSON format +- CLI `--parse-statedump` flag for `secops parser run` command + ## [0.35.1] - 2026-02-23 ### Added - `as_list` parameter to `search_udm()` for returning events as a list instead of dictionary diff --git a/CLI.md b/CLI.md index eb9ec6f2..827a4814 100644 --- a/CLI.md +++ b/CLI.md @@ -616,8 +616,18 @@ secops parser run \ secops parser run \ --log-type OKTA \ --logs-file "./test.log" + +# Run parser with statedump for debugging (outputs readable parser state) +secops parser run \ + --log-type WINEVTLOG \ + --parser-code-file "./parser.conf" \ + --logs-file "./logs.txt" \ + --statedump-allowed \ + --parse-statedump ``` +The `--statedump-allowed` flag enables statedump output in the parser results, which shows the internal state of the parser during execution. The `--parse-statedump` flag converts the statedump string into a structured JSON format. + The command validates: - Log type and parser code are provided - At least one log is provided diff --git a/README.md b/README.md index 26951a1c..b51ac5c7 100644 --- a/README.md +++ b/README.md @@ -1666,6 +1666,27 @@ if "runParserResults" in result: print(f" Parsed events: {parser_result['parsedEvents']}") if "errors" in parser_result: print(f" Errors: {parser_result['errors']}") + +# Run parser with statedump for debugging +# Statedump provides internal parser state useful for troubleshooting +result_with_statedump = chronicle.run_parser( + log_type=log_type, + parser_code=parser_text, + parser_extension_code=None, + logs=sample_logs, + statedump_allowed=True, # Enable statedump in parser output + parse_statedump=True # Parse statedump string into structured format +) + +# Check statedump results (useful for parser debugging) +if "runParserResults" in result_with_statedump: + for i, parser_result in enumerate(result_with_statedump["runParserResults"]): + if "statedumpResults" in parser_result: + for dump in parser_result["statedumpResults"]: + statedump = dump.get("statedumpResult", {}) + print(f"\nParser state for log {i+1}:") + print(f" Info: {statedump.get('info', '')}") + print(f" State: {statedump.get('state', {})}") ``` The `run_parser` function includes comprehensive validation: diff --git a/pyproject.toml b/pyproject.toml index 11d90056..32f4bba7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "secops" -version = "0.35.1" +version = "0.35.2" description = "Python SDK for wrapping the Google SecOps API for common use cases" readme = "README.md" requires-python = ">=3.10" diff --git a/src/secops/chronicle/client.py b/src/secops/chronicle/client.py index 2795f8bc..462ff2c8 100644 --- a/src/secops/chronicle/client.py +++ b/src/secops/chronicle/client.py @@ -2441,6 +2441,7 @@ def run_parser( parser_extension_code: str, logs: list, statedump_allowed: bool = False, + parse_statedump: bool = False, ): """Run parser against sample logs. @@ -2451,6 +2452,8 @@ def run_parser( parser_extension_code: Content of the parser extension logs: list of logs to test parser against statedump_allowed: Statedump filter is enabled or not for a config + parse_statedump: Whether to parse statedump results into + structured format. Returns: Dictionary containing the parser result @@ -2465,6 +2468,7 @@ def run_parser( parser_extension_code=parser_extension_code, logs=logs, statedump_allowed=statedump_allowed, + parse_statedump=parse_statedump, ) # Rule Set methods diff --git a/src/secops/chronicle/parser.py b/src/secops/chronicle/parser.py index b670ba40..e1c3488e 100644 --- a/src/secops/chronicle/parser.py +++ b/src/secops/chronicle/parser.py @@ -15,6 +15,7 @@ """Parser management functionality for Chronicle.""" import base64 +import json from typing import Any from secops.exceptions import APIError @@ -319,6 +320,7 @@ def run_parser( parser_extension_code: str | None, logs: list[str], statedump_allowed: bool = False, + parse_statedump: bool = False, ) -> dict[str, Any]: """Run parser against sample logs. @@ -329,6 +331,8 @@ def run_parser( parser_extension_code: Optional content of the parser extension logs: List of log strings to test parser against statedump_allowed: Whether statedump filter is enabled for the config + parse_statedump: Whether to parse statedump results into structured + format. Returns: Dictionary containing the parser evaluation results with structure: @@ -336,10 +340,13 @@ def run_parser( "runParserResults": [ { "parsedEvents": [...], - "errors": [...] + "errors": [...], + "statedumpResults": [...] (if statedump_allowed=True) } ] } + If parse_statedump is True, statedumpResult strings are converted + to structured objects. Raises: ValueError: If input parameters are invalid @@ -450,4 +457,35 @@ def run_parser( raise APIError(error_detail) - return response.json() + result = response.json() + + if parse_statedump and "runParserResults" in result: + for run_result in result["runParserResults"]: + if "statedumpResults" in run_result: + for statedump_item in run_result["statedumpResults"]: + if "statedumpResult" in statedump_item: + try: + dump_str = statedump_item["statedumpResult"] + if isinstance(dump_str, str): + stripped = dump_str.strip() + if ":" in stripped: + parts = stripped.split("\n", 1) + info_line = parts[0].strip() + if "Internal State" in info_line: + info = info_line + if len(parts) > 1: + state_json = parts[1].strip() + state = json.loads(state_json) + else: + state = {} + statedump_item["statedumpResult"] = { + "info": info, + "state": state, + } + except ( + ValueError, + json.JSONDecodeError, + ) as e: + print(f"Warning: Failed to parse statedump: {e}") + + return result diff --git a/src/secops/cli/commands/parser.py b/src/secops/cli/commands/parser.py index a9574bea..b6896170 100644 --- a/src/secops/cli/commands/parser.py +++ b/src/secops/cli/commands/parser.py @@ -223,6 +223,12 @@ def setup_parser_command(subparsers): action="store_true", help="Enable statedump filter for the parser configuration", ) + run_parser_sub.add_argument( + "--parse-statedump", + "--parse_statedump", + action="store_true", + help=("Parse statedump results into readable format"), + ) run_parser_sub.set_defaults(func=handle_parser_run_command) @@ -404,6 +410,7 @@ def handle_parser_run_command(args, chronicle): parser_extension_code, logs, args.statedump_allowed, + args.parse_statedump, ) output_formatter(result, args.output) diff --git a/tests/chronicle/test_parser.py b/tests/chronicle/test_parser.py index ee7b6307..2da74cc2 100644 --- a/tests/chronicle/test_parser.py +++ b/tests/chronicle/test_parser.py @@ -955,3 +955,176 @@ def test_run_parser_validation_non_string_log(chronicle_client): ) assert "All logs must be strings" in str(exc_info.value) assert "index 1" in str(exc_info.value) + + +def test_run_parser_with_statedump_parsing(chronicle_client, mock_response): + """Test run_parser with parse_statedump=True.""" + log_type = "WINEVTLOG" + parser_code = "filter {}" + logs = ["test log"] + + statedump_string = '\n\nInternal State (label=):\n{\n "key": "value"\n}' + + expected_result = { + "runParserResults": [ + { + "parsedEvents": {"events": []}, + "statedumpResults": [{"statedumpResult": statedump_string}], + } + ] + } + mock_response.json.return_value = expected_result + + with patch.object( + chronicle_client.session, "post", return_value=mock_response + ) as mock_post: + result = run_parser( + chronicle_client, + log_type=log_type, + parser_code=parser_code, + parser_extension_code="", + logs=logs, + statedump_allowed=True, + parse_statedump=True, + ) + + called_args = mock_post.call_args + request_body = called_args[1]["json"] + assert request_body["statedump_allowed"] is True + + assert "runParserResults" in result + assert len(result["runParserResults"]) == 1 + statedump_results = result["runParserResults"][0]["statedumpResults"] + assert len(statedump_results) == 1 + parsed_statedump = statedump_results[0]["statedumpResult"] + assert isinstance(parsed_statedump, dict) + assert "info" in parsed_statedump + assert "state" in parsed_statedump + assert parsed_statedump["info"] == "Internal State (label=):" + assert parsed_statedump["state"]["key"] == "value" + + +def test_run_parser_without_statedump_parsing(chronicle_client, mock_response): + """Test run_parser with parse_statedump=False (default).""" + log_type = "WINEVTLOG" + parser_code = "filter {}" + logs = ["test log"] + + statedump_string = '\n\nInternal State (label=):\n{\n "key": "value"\n}' + + expected_result = { + "runParserResults": [ + { + "parsedEvents": {"events": []}, + "statedumpResults": [{"statedumpResult": statedump_string}], + } + ] + } + mock_response.json.return_value = expected_result + + with patch.object( + chronicle_client.session, "post", return_value=mock_response + ): + result = run_parser( + chronicle_client, + log_type=log_type, + parser_code=parser_code, + parser_extension_code="", + logs=logs, + statedump_allowed=True, + parse_statedump=False, + ) + + assert "runParserResults" in result + statedump_results = result["runParserResults"][0]["statedumpResults"] + original_statedump = statedump_results[0]["statedumpResult"] + assert original_statedump == statedump_string + + +def test_run_parser_statedump_parsing_with_invalid_json( + chronicle_client, mock_response, capsys +): + """Test statedump parsing handles invalid JSON gracefully.""" + log_type = "WINEVTLOG" + parser_code = "filter {}" + logs = ["test log"] + + expected_result = { + "runParserResults": [ + { + "parsedEvents": {"events": []}, + "statedumpResults": [ + {"statedumpResult": "Internal State:\n{invalid json}"} + ], + } + ] + } + mock_response.json.return_value = expected_result + + with patch.object( + chronicle_client.session, "post", return_value=mock_response + ): + result = run_parser( + chronicle_client, + log_type=log_type, + parser_code=parser_code, + parser_extension_code="", + logs=logs, + statedump_allowed=True, + parse_statedump=True, + ) + + captured = capsys.readouterr() + assert "Warning: Failed to parse statedump" in captured.out + + assert "runParserResults" in result + statedump_results = result["runParserResults"][0]["statedumpResults"] + assert ( + statedump_results[0]["statedumpResult"] + == "Internal State:\n{invalid json}" + ) + + +def test_run_parser_statedump_parsing_multiple_results( + chronicle_client, mock_response +): + """Test statedump parsing with multiple statedump results.""" + log_type = "WINEVTLOG" + parser_code = "filter {}" + logs = ["test log 1", "test log 2"] + + statedump1 = '\n\nInternal State (label=):\n{\n "log": "1"\n}' + statedump2 = '\n\nInternal State (label=):\n{\n "log": "2"\n}' + + expected_result = { + "runParserResults": [ + { + "parsedEvents": {"events": []}, + "statedumpResults": [ + {"statedumpResult": statedump1}, + {"statedumpResult": statedump2}, + ], + } + ] + } + mock_response.json.return_value = expected_result + + with patch.object( + chronicle_client.session, "post", return_value=mock_response + ): + result = run_parser( + chronicle_client, + log_type=log_type, + parser_code=parser_code, + parser_extension_code="", + logs=logs, + statedump_allowed=True, + parse_statedump=True, + ) + + statedump_results = result["runParserResults"][0]["statedumpResults"] + assert len(statedump_results) == 2 + assert isinstance(statedump_results[0]["statedumpResult"], dict) + assert statedump_results[0]["statedumpResult"]["state"]["log"] == "1" + assert isinstance(statedump_results[1]["statedumpResult"], dict) + assert statedump_results[1]["statedumpResult"]["state"]["log"] == "2" diff --git a/tests/cli/test_integration.py b/tests/cli/test_integration.py index 6d9a823e..b925235c 100644 --- a/tests/cli/test_integration.py +++ b/tests/cli/test_integration.py @@ -748,6 +748,118 @@ def test_cli_parser_run_error_cases(cli_env, common_args): os.unlink(parser_file) +@pytest.mark.integration +def test_cli_parser_run_evaluation_with_parsed_statedump(cli_env, common_args): + """ + Test the 'parser run' command functionality with parsed statedump, covering + reading parser code and logs from files, and providing logs via multiple + --log arguments. + """ + test_log_type = f"RESERVED_LOG_TYPE_1" + + # Sample YARA-L parser code + sample_parser_code = r""" + filter { + mutate { + replace => { + "event1.idm.read_only_udm.metadata.event_type" => "GENERIC_EVENT" + "event1.idm.read_only_udm.metadata.vendor_name" => "ACME Labs" + } + } + grok { + match => { + "message" => ["^(?P<_firstWord>[^\s]+)\s.*$"] + } + on_error => "_grok_message_failed" + } + if ![_grok_message_failed] { + mutate { + replace => { + "event1.idm.read_only_udm.metadata.description" => "%{_firstWord}" + } + } + } + mutate { + merge => { + "@output" => "event1" + } + } + statedump{} + } + """ + + # Sample log data for --logs-file + sample_logs_file_content = """ + {"appDisplayName":"Azure Active Directory PowerShell","appId":"1b730912-1644-4b74-9bfd-dac224a7b894","appliedConditionalAccessPolicies":[],"clientAppUsed":"Mobile Apps and Desktop clients","conditionalAccessStatus":"success","correlationId":"8bdadb11-5851-4ff2-ad57-799c0149f606","createdDateTime":"2025-06-15T04:31:56Z","deviceDetail":{"browser":"Rich Client 5.2.8.0","deviceId":"","displayName":"","isCompliant":false,"isManaged":false,"operatingSystem":"Windows 8","trustType":""},"id":"ba6e48d0-85e9-45b0-9ce4-83eb83432200","ipAddress":"79.116.213.193","isInteractive":true,"location":{"city":"Madrid","countryOrRegion":"ES","geoCoordinates":{"altitude":null,"latitude":40.416,"longitude":-3.703},"state":"Madrid"},"resourceDisplayName":"Windows Azure Active Directory","resourceId":"00000001-0000-0000-d000-000000000000","riskDetail":"none","riskEventTypes":[],"riskEventTypes_v2":[],"riskLevelAggregated":"none","riskLevelDuringSignIn":"none","riskState":"none","status":{"additionalDetails":null,"errorCode":0,"failureReason":"Other."},"userDisplayName":"Admin Read Only","userId":"6838ec00-f384-40d8-b288-989103aed42b","userPrincipalName":"reports@example.onmicrosoft.com"} + """ + + parser_code_file_path = None + logs_file_path = None + + try: + # Create temporary parser code file + with tempfile.NamedTemporaryFile( + suffix=".yara", mode="w+", delete=False + ) as temp_file: + temp_file.write(sample_parser_code) + parser_code_file_path = temp_file.name + + # Create temporary logs file + with tempfile.NamedTemporaryFile( + suffix=".log", mode="w+", delete=False + ) as temp_file: + temp_file.write(sample_logs_file_content) + logs_file_path = temp_file.name + + # --- Scenario 1: Using --parser-code-file and --logs-file --- + run_cmd_file_input = ( + [ + "secops", # Replace with your actual CLI entry point + ] + + common_args + + [ + "parser", + "run", + "--log-type", + test_log_type, + "--parser-code-file", + parser_code_file_path, + "--logs-file", + logs_file_path, + "--statedump-allowed", + "--parse_statedump", + ] + ) + + run_result_file_input = subprocess.run( + run_cmd_file_input, env=cli_env, capture_output=True, text=True + ) + + # Assert CLI command execution success + assert run_result_file_input.returncode == 0, ( + "Parser run with files failed: " + f"{run_result_file_input.stderr}\n{run_result_file_input.stdout}" + ) + + # Parse and assert the output + run_output_file_input = json.loads(run_result_file_input.stdout) + assert "parsedEvents" in run_output_file_input["runParserResults"][0] + + statedump_results = run_output_file_input["runParserResults"][0].get( + "statedumpResults" + ) + assert len(statedump_results) + assert statedump_results[0].get("statedumpResult") + assert statedump_results[0].get("statedumpResult").get("state") + + finally: + # Clean up temporary files regardless of test outcome + if parser_code_file_path and os.path.exists(parser_code_file_path): + os.unlink(parser_code_file_path) + if logs_file_path and os.path.exists(logs_file_path): + os.unlink(logs_file_path) + + @pytest.mark.integration def test_cli_rule_list(cli_env, common_args): """Test the rule list command."""