diff --git a/pkg/workflow/expression_extraction.go b/pkg/workflow/expression_extraction.go index 36dfe05cc1d..afab29dd0d9 100644 --- a/pkg/workflow/expression_extraction.go +++ b/pkg/workflow/expression_extraction.go @@ -19,10 +19,7 @@ var expressionExtractionLog = logger.New("workflow:expression_extraction") // Pre-compiled regexes for performance (avoid recompilation in hot paths) var ( - // expressionExtractionRegex matches GitHub Actions expressions: ${{ ... }} - // Uses (?s) flag for dotall mode, non-greedy matching - expressionExtractionRegex = regexp.MustCompile(`\$\{\{(.*?)\}\}`) - awContextExpressionRegex = regexp.MustCompile(`github\.aw\.context\.([a-zA-Z0-9_]+)([^a-zA-Z0-9_-]|$)`) + awContextExpressionRegex = regexp.MustCompile(`github\.aw\.context\.([a-zA-Z0-9_]+)([^a-zA-Z0-9_-]|$)`) ) // ExpressionMapping represents a mapping between a GitHub expression and its environment variable @@ -143,7 +140,7 @@ func (e *ExpressionExtractor) processMatch(originalExpr, rawContent string) { func (e *ExpressionExtractor) ExtractExpressions(markdown string) ([]*ExpressionMapping, error) { expressionExtractionLog.Printf("Extracting expressions from markdown: content_length=%d", len(markdown)) - matches := expressionExtractionRegex.FindAllStringSubmatch(markdown, -1) + matches := ExpressionPattern.FindAllStringSubmatch(markdown, -1) expressionExtractionLog.Printf("Found %d expression matches", len(matches)) for _, match := range matches { @@ -428,14 +425,6 @@ func (e *ExpressionExtractor) ReplaceExpressionsWithEnvVars(markdown string) str return result } -// awInputsExprRegex matches ${{ github.aw.inputs. }} expressions -var awInputsExprRegex = regexp.MustCompile(`\$\{\{\s*github\.aw\.inputs\.([a-zA-Z0-9_-]+)\s*\}\}`) - -// awImportInputsExprRegex matches ${{ github.aw.import-inputs. }} and -// ${{ github.aw.import-inputs.. }} expressions (import-schema form). -// Captures the full dotted path (e.g. "count" or "config.apiKey"). -var awImportInputsExprRegex = regexp.MustCompile(`\$\{\{\s*github\.aw\.import-inputs\.([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)?)\s*\}\}`) - // applyWorkflowDispatchFallbacks enhances entity number expressions with an // "|| inputs.item_number" fallback when the workflow has a workflow_dispatch // trigger that includes the item_number input (generated by the label trigger @@ -499,9 +488,9 @@ func SubstituteImportInputs(content string, importInputs map[string]any) string } // Substitute ${{ github.aw.inputs. }} (legacy form) - result := awInputsExprRegex.ReplaceAllStringFunc(content, substituteFunc(awInputsExprRegex, "inputs")) + result := AWInputsExpressionPattern.ReplaceAllStringFunc(content, substituteFunc(AWInputsExpressionPattern, "inputs")) // Substitute ${{ github.aw.import-inputs. }} (import-schema form) - result = awImportInputsExprRegex.ReplaceAllStringFunc(result, substituteFunc(awImportInputsExprRegex, "import-inputs")) + result = AWImportInputsExpressionPattern.ReplaceAllStringFunc(result, substituteFunc(AWImportInputsExpressionPattern, "import-inputs")) return result } diff --git a/pkg/workflow/expression_safety_validation.go b/pkg/workflow/expression_safety_validation.go index f8487408552..62594050f2e 100644 --- a/pkg/workflow/expression_safety_validation.go +++ b/pkg/workflow/expression_safety_validation.go @@ -23,13 +23,6 @@ const maxFuzzyMatchSuggestions = 7 // Pre-compiled regexes for expression safety validation (performance optimization) var ( - expressionRegex = regexp.MustCompile(`(?s)\$\{\{(.*?)\}\}`) - needsStepsRegex = regexp.MustCompile(`^(needs|steps)\.[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)*$`) - inputsRegex = regexp.MustCompile(`^github\.event\.inputs\.[a-zA-Z0-9_-]+$`) - workflowCallInputsRegex = regexp.MustCompile(`^inputs\.[a-zA-Z0-9_-]+$`) - awInputsRegex = regexp.MustCompile(`^github\.aw\.inputs\.[a-zA-Z0-9_-]+$`) - awImportInputsRegex = regexp.MustCompile(`^github\.aw\.import-inputs\.[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)?$`) - envRegex = regexp.MustCompile(`^env\.[a-zA-Z0-9_-]+$`) // comparisonExpressionPattern matches a full comparison expression so both sides can be // validated recursively instead of allowing a safe-looking prefix to bypass validation. comparisonExpressionPattern = regexp.MustCompile(`^(.+?)\s*(?:==|!=|<|>|<=|>=)\s*(.+)$`) @@ -42,7 +35,7 @@ var ( func validateExpressionSafety(markdownContent string) error { expressionValidationLog.Print("Validating expression safety in markdown content") - matches := expressionRegex.FindAllStringSubmatch(markdownContent, -1) + matches := ExpressionPatternDotAll.FindAllStringSubmatch(markdownContent, -1) expressionValidationLog.Printf("Found %d expressions to validate", len(matches)) var unauthorizedExpressions []string @@ -67,12 +60,12 @@ func validateExpressionSafety(markdownContent string) error { // If we can parse it, validate each literal expression in the tree validationErr := VisitExpressionTree(parsed, func(expr *ExpressionNode) error { return validateSingleExpression(expr.Expression, ExpressionValidationOptions{ - NeedsStepsRe: needsStepsRegex, - InputsRe: inputsRegex, - WorkflowCallInputsRe: workflowCallInputsRegex, - AwInputsRe: awInputsRegex, - AwImportInputsRe: awImportInputsRegex, - EnvRe: envRegex, + NeedsStepsRe: NeedsStepsPattern, + InputsRe: InputsPattern, + WorkflowCallInputsRe: WorkflowCallInputsPattern, + AwInputsRe: AWInputsPattern, + AwImportInputsRe: AWImportInputsPattern, + EnvRe: EnvPattern, UnauthorizedExpressions: &unauthorizedExpressions, }) }) @@ -82,12 +75,12 @@ func validateExpressionSafety(markdownContent string) error { } else { // If parsing fails, fall back to validating the whole expression as a literal err := validateSingleExpression(expression, ExpressionValidationOptions{ - NeedsStepsRe: needsStepsRegex, - InputsRe: inputsRegex, - WorkflowCallInputsRe: workflowCallInputsRegex, - AwInputsRe: awInputsRegex, - AwImportInputsRe: awImportInputsRegex, - EnvRe: envRegex, + NeedsStepsRe: NeedsStepsPattern, + InputsRe: InputsPattern, + WorkflowCallInputsRe: WorkflowCallInputsPattern, + AwInputsRe: AWInputsPattern, + AwImportInputsRe: AWImportInputsPattern, + EnvRe: EnvPattern, UnauthorizedExpressions: &unauthorizedExpressions, }) if err != nil { diff --git a/pkg/workflow/expressions_benchmark_test.go b/pkg/workflow/expressions_benchmark_test.go index a1ec1d6582b..3fe0f4b6732 100644 --- a/pkg/workflow/expressions_benchmark_test.go +++ b/pkg/workflow/expressions_benchmark_test.go @@ -13,11 +13,11 @@ func BenchmarkValidateExpression(b *testing.B) { for b.Loop() { _ = validateSingleExpression(expression, ExpressionValidationOptions{ - NeedsStepsRe: needsStepsRegex, - InputsRe: inputsRegex, - WorkflowCallInputsRe: workflowCallInputsRegex, - AwInputsRe: awInputsRegex, - EnvRe: envRegex, + NeedsStepsRe: NeedsStepsPattern, + InputsRe: InputsPattern, + WorkflowCallInputsRe: WorkflowCallInputsPattern, + AwInputsRe: AWInputsPattern, + EnvRe: EnvPattern, UnauthorizedExpressions: &unauthorizedExprs, }) } @@ -30,11 +30,11 @@ func BenchmarkValidateExpression_Complex(b *testing.B) { for b.Loop() { _ = validateSingleExpression(expression, ExpressionValidationOptions{ - NeedsStepsRe: needsStepsRegex, - InputsRe: inputsRegex, - WorkflowCallInputsRe: workflowCallInputsRegex, - AwInputsRe: awInputsRegex, - EnvRe: envRegex, + NeedsStepsRe: NeedsStepsPattern, + InputsRe: InputsPattern, + WorkflowCallInputsRe: WorkflowCallInputsPattern, + AwInputsRe: AWInputsPattern, + EnvRe: EnvPattern, UnauthorizedExpressions: &unauthorizedExprs, }) } @@ -47,11 +47,11 @@ func BenchmarkValidateExpression_NeedsOutputs(b *testing.B) { for b.Loop() { _ = validateSingleExpression(expression, ExpressionValidationOptions{ - NeedsStepsRe: needsStepsRegex, - InputsRe: inputsRegex, - WorkflowCallInputsRe: workflowCallInputsRegex, - AwInputsRe: awInputsRegex, - EnvRe: envRegex, + NeedsStepsRe: NeedsStepsPattern, + InputsRe: InputsPattern, + WorkflowCallInputsRe: WorkflowCallInputsPattern, + AwInputsRe: AWInputsPattern, + EnvRe: EnvPattern, UnauthorizedExpressions: &unauthorizedExprs, }) } @@ -64,11 +64,11 @@ func BenchmarkValidateExpression_StepsOutputs(b *testing.B) { for b.Loop() { _ = validateSingleExpression(expression, ExpressionValidationOptions{ - NeedsStepsRe: needsStepsRegex, - InputsRe: inputsRegex, - WorkflowCallInputsRe: workflowCallInputsRegex, - AwInputsRe: awInputsRegex, - EnvRe: envRegex, + NeedsStepsRe: NeedsStepsPattern, + InputsRe: InputsPattern, + WorkflowCallInputsRe: WorkflowCallInputsPattern, + AwInputsRe: AWInputsPattern, + EnvRe: EnvPattern, UnauthorizedExpressions: &unauthorizedExprs, }) } diff --git a/pkg/workflow/secret_extraction.go b/pkg/workflow/secret_extraction.go index ca651c7d649..598069abd78 100644 --- a/pkg/workflow/secret_extraction.go +++ b/pkg/workflow/secret_extraction.go @@ -10,14 +10,8 @@ import ( var secretLog = logger.New("workflow:secret_extraction") -// Pre-compiled regex for secret extraction (performance optimization) -// Matches: ${{ secrets.SECRET_NAME }} or ${{ secrets.SECRET_NAME || 'default' }} -var secretExprPattern = regexp.MustCompile(`\$\{\{\s*secrets\.([A-Z_][A-Z0-9_]*)\s*(?:\|\|.*?)?\s*\}\}`) - // Pre-compiled regex patterns for ExtractSecretsFromValue (performance optimization) var ( - // secretsExprFindPattern matches all ${{ ... }} expressions in a value - secretsExprFindPattern = regexp.MustCompile(`\$\{\{[^}]+\}\}`) // secretsNamePattern extracts the secret variable name from an expression secretsNamePattern = regexp.MustCompile(`secrets\.([A-Z_][A-Z0-9_]*)`) ) @@ -34,7 +28,7 @@ type SecretExpression struct { // - "${{ secrets.DD_SITE || 'datadoghq.com' }}" -> "DD_SITE" // - "plain value" -> "" func ExtractSecretName(value string) string { - matches := secretExprPattern.FindStringSubmatch(value) + matches := SecretExpressionPattern.FindStringSubmatch(value) if len(matches) >= 2 { return matches[1] } @@ -55,7 +49,7 @@ func ExtractSecretsFromValue(value string) map[string]string { // Find all ${{ ... }} expressions in the value // Pattern matches from ${{ to }} allowing nested content - expressions := secretsExprFindPattern.FindAllString(value, -1) + expressions := InlineExpressionPattern.FindAllString(value, -1) // For each expression, check if it contains secrets.VARIABLE_NAME // This handles both simple cases like "${{ secrets.TOKEN }}" diff --git a/pkg/workflow/template_injection_validation.go b/pkg/workflow/template_injection_validation.go index 6ce09f47ddc..806fe713caa 100644 --- a/pkg/workflow/template_injection_validation.go +++ b/pkg/workflow/template_injection_validation.go @@ -56,13 +56,6 @@ var templateInjectionValidationLog = newValidationLogger("template_injection") // Pre-compiled regex patterns for template injection detection var ( - // inlineExpressionRegex matches GitHub Actions template expressions ${{ ... }} - inlineExpressionRegex = regexp.MustCompile(`\$\{\{[^}]+\}\}`) - - // unsafeContextRegex matches high-risk context expressions that could contain user input - // These patterns are particularly dangerous when used directly in shell commands - unsafeContextRegex = regexp.MustCompile(`\$\{\{\s*(github\.event\.|steps\.[^}]+\.outputs\.|inputs\.)[^}]+\}\}`) - // allowedRunScriptExpressionRegex matches trusted compiler-owned expressions that are // intentionally rendered in generated run scripts and are not user-controlled. allowedRunScriptExpressionRegex = regexp.MustCompile(`^\$\{\{\s*(env\.[^}]+|vars\.[^}]+|runner\.[^}]+|github\.(repository|run_id|workspace)|steps\.parse-guard-vars\.outputs\.(approval_labels|blocked_users|trusted_users)|job\.services\[[^]]+\]\.ports\[[^]]+\])\s*\}\}$`) @@ -73,7 +66,7 @@ var ( // Used by the compiler regression guardrail to detect expressions that should have // been rewritten to env variables. func hasAnyExpressionInRunContent(yamlContent string) bool { - return hasExpressionInRunContent(yamlContent, inlineExpressionRegex) + return hasExpressionInRunContent(yamlContent, InlineExpressionPattern) } func hasExpressionInRunContent(yamlContent string, expressionRegex *regexp.Regexp) bool { @@ -154,7 +147,7 @@ func validateNoTemplateInjectionFromParsed(workflow map[string]any) error { for _, runContent := range runBlocks { // Check if this run block contains inline expressions - if !inlineExpressionRegex.MatchString(runContent) { + if !InlineExpressionPattern.MatchString(runContent) { continue } @@ -164,11 +157,11 @@ func validateNoTemplateInjectionFromParsed(workflow map[string]any) error { contentWithoutHeredocs := removeHeredocContent(runContent) // Extract all inline expressions from this run block (excluding heredocs) - expressions := inlineExpressionRegex.FindAllString(contentWithoutHeredocs, -1) + expressions := InlineExpressionPattern.FindAllString(contentWithoutHeredocs, -1) // Check each expression for unsafe contexts for _, expr := range expressions { - if unsafeContextRegex.MatchString(expr) { + if UnsafeContextPattern.MatchString(expr) { // Found an unsafe pattern - extract a snippet for context snippet := extractRunSnippet(contentWithoutHeredocs, expr) violations = append(violations, TemplateInjectionViolation{ @@ -209,7 +202,7 @@ func validateNoGitHubExpressionsInRunScriptsFromParsed(workflow map[string]any) // Align with template-injection validation: heredoc bodies are written to files // and are not executed as shell commands, so they are excluded from scanning. contentWithoutHeredocs := removeHeredocContent(runContent) - expressions := inlineExpressionRegex.FindAllString(contentWithoutHeredocs, -1) + expressions := InlineExpressionPattern.FindAllString(contentWithoutHeredocs, -1) for _, expr := range expressions { if allowedRunScriptExpressionRegex.MatchString(expr) { continue diff --git a/pkg/workflow/template_injection_validation_test.go b/pkg/workflow/template_injection_validation_test.go index bb93ec41878..a581f18eb6d 100644 --- a/pkg/workflow/template_injection_validation_test.go +++ b/pkg/workflow/template_injection_validation_test.go @@ -14,7 +14,7 @@ import ( // validateNoTemplateInjection is a test helper that parses YAML and validates // it for template injection vulnerabilities using validateNoTemplateInjectionFromParsed. func validateNoTemplateInjection(yamlContent string) error { - if !unsafeContextRegex.MatchString(yamlContent) { + if !UnsafeContextPattern.MatchString(yamlContent) { return nil } var workflow map[string]any