Skip to content
19 changes: 4 additions & 15 deletions pkg/workflow/expression_extraction.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ var expressionExtractionLog = logger.New("workflow:expression_extraction")

// Pre-compiled regexes for performance (avoid recompilation in hot paths)
var (
// expressionExtractionRegex matches GitHub Actions expressions: ${{ ... }}
// Uses (?s) flag for dotall mode, non-greedy matching
expressionExtractionRegex = regexp.MustCompile(`\$\{\{(.*?)\}\}`)
awContextExpressionRegex = regexp.MustCompile(`github\.aw\.context\.([a-zA-Z0-9_]+)([^a-zA-Z0-9_-]|$)`)
awContextExpressionRegex = regexp.MustCompile(`github\.aw\.context\.([a-zA-Z0-9_]+)([^a-zA-Z0-9_-]|$)`)
)

// ExpressionMapping represents a mapping between a GitHub expression and its environment variable
Expand Down Expand Up @@ -143,7 +140,7 @@ func (e *ExpressionExtractor) processMatch(originalExpr, rawContent string) {
func (e *ExpressionExtractor) ExtractExpressions(markdown string) ([]*ExpressionMapping, error) {
expressionExtractionLog.Printf("Extracting expressions from markdown: content_length=%d", len(markdown))

matches := expressionExtractionRegex.FindAllStringSubmatch(markdown, -1)
matches := ExpressionPattern.FindAllStringSubmatch(markdown, -1)
expressionExtractionLog.Printf("Found %d expression matches", len(matches))

for _, match := range matches {
Expand Down Expand Up @@ -428,14 +425,6 @@ func (e *ExpressionExtractor) ReplaceExpressionsWithEnvVars(markdown string) str
return result
}

// awInputsExprRegex matches ${{ github.aw.inputs.<key> }} expressions
var awInputsExprRegex = regexp.MustCompile(`\$\{\{\s*github\.aw\.inputs\.([a-zA-Z0-9_-]+)\s*\}\}`)

// awImportInputsExprRegex matches ${{ github.aw.import-inputs.<key> }} and
// ${{ github.aw.import-inputs.<key>.<subkey> }} expressions (import-schema form).
// Captures the full dotted path (e.g. "count" or "config.apiKey").
var awImportInputsExprRegex = regexp.MustCompile(`\$\{\{\s*github\.aw\.import-inputs\.([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)?)\s*\}\}`)

// applyWorkflowDispatchFallbacks enhances entity number expressions with an
// "|| inputs.item_number" fallback when the workflow has a workflow_dispatch
// trigger that includes the item_number input (generated by the label trigger
Expand Down Expand Up @@ -499,9 +488,9 @@ func SubstituteImportInputs(content string, importInputs map[string]any) string
}

// Substitute ${{ github.aw.inputs.<key> }} (legacy form)
result := awInputsExprRegex.ReplaceAllStringFunc(content, substituteFunc(awInputsExprRegex, "inputs"))
result := AWInputsExpressionPattern.ReplaceAllStringFunc(content, substituteFunc(AWInputsExpressionPattern, "inputs"))
// Substitute ${{ github.aw.import-inputs.<key> }} (import-schema form)
result = awImportInputsExprRegex.ReplaceAllStringFunc(result, substituteFunc(awImportInputsExprRegex, "import-inputs"))
result = AWImportInputsExpressionPattern.ReplaceAllStringFunc(result, substituteFunc(AWImportInputsExpressionPattern, "import-inputs"))

return result
}
Expand Down
33 changes: 13 additions & 20 deletions pkg/workflow/expression_safety_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@ const maxFuzzyMatchSuggestions = 7

// Pre-compiled regexes for expression safety validation (performance optimization)
var (
expressionRegex = regexp.MustCompile(`(?s)\$\{\{(.*?)\}\}`)
needsStepsRegex = regexp.MustCompile(`^(needs|steps)\.[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)*$`)
inputsRegex = regexp.MustCompile(`^github\.event\.inputs\.[a-zA-Z0-9_-]+$`)
workflowCallInputsRegex = regexp.MustCompile(`^inputs\.[a-zA-Z0-9_-]+$`)
awInputsRegex = regexp.MustCompile(`^github\.aw\.inputs\.[a-zA-Z0-9_-]+$`)
awImportInputsRegex = regexp.MustCompile(`^github\.aw\.import-inputs\.[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)?$`)
envRegex = regexp.MustCompile(`^env\.[a-zA-Z0-9_-]+$`)
// comparisonExpressionPattern matches a full comparison expression so both sides can be
// validated recursively instead of allowing a safe-looking prefix to bypass validation.
comparisonExpressionPattern = regexp.MustCompile(`^(.+?)\s*(?:==|!=|<|>|<=|>=)\s*(.+)$`)
Expand All @@ -42,7 +35,7 @@ var (
func validateExpressionSafety(markdownContent string) error {
expressionValidationLog.Print("Validating expression safety in markdown content")

matches := expressionRegex.FindAllStringSubmatch(markdownContent, -1)
matches := ExpressionPatternDotAll.FindAllStringSubmatch(markdownContent, -1)
expressionValidationLog.Printf("Found %d expressions to validate", len(matches))

var unauthorizedExpressions []string
Expand All @@ -67,12 +60,12 @@ func validateExpressionSafety(markdownContent string) error {
// If we can parse it, validate each literal expression in the tree
validationErr := VisitExpressionTree(parsed, func(expr *ExpressionNode) error {
return validateSingleExpression(expr.Expression, ExpressionValidationOptions{
NeedsStepsRe: needsStepsRegex,
InputsRe: inputsRegex,
WorkflowCallInputsRe: workflowCallInputsRegex,
AwInputsRe: awInputsRegex,
AwImportInputsRe: awImportInputsRegex,
EnvRe: envRegex,
NeedsStepsRe: NeedsStepsPattern,
InputsRe: InputsPattern,
WorkflowCallInputsRe: WorkflowCallInputsPattern,
AwInputsRe: AWInputsPattern,
AwImportInputsRe: AWImportInputsPattern,
EnvRe: EnvPattern,
UnauthorizedExpressions: &unauthorizedExpressions,
})
})
Expand All @@ -82,12 +75,12 @@ func validateExpressionSafety(markdownContent string) error {
} else {
// If parsing fails, fall back to validating the whole expression as a literal
err := validateSingleExpression(expression, ExpressionValidationOptions{
NeedsStepsRe: needsStepsRegex,
InputsRe: inputsRegex,
WorkflowCallInputsRe: workflowCallInputsRegex,
AwInputsRe: awInputsRegex,
AwImportInputsRe: awImportInputsRegex,
EnvRe: envRegex,
NeedsStepsRe: NeedsStepsPattern,
InputsRe: InputsPattern,
WorkflowCallInputsRe: WorkflowCallInputsPattern,
AwInputsRe: AWInputsPattern,
AwImportInputsRe: AWImportInputsPattern,
EnvRe: EnvPattern,
UnauthorizedExpressions: &unauthorizedExpressions,
})
if err != nil {
Expand Down
40 changes: 20 additions & 20 deletions pkg/workflow/expressions_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ func BenchmarkValidateExpression(b *testing.B) {

for b.Loop() {
_ = validateSingleExpression(expression, ExpressionValidationOptions{
NeedsStepsRe: needsStepsRegex,
InputsRe: inputsRegex,
WorkflowCallInputsRe: workflowCallInputsRegex,
AwInputsRe: awInputsRegex,
EnvRe: envRegex,
NeedsStepsRe: NeedsStepsPattern,
InputsRe: InputsPattern,
WorkflowCallInputsRe: WorkflowCallInputsPattern,
AwInputsRe: AWInputsPattern,
EnvRe: EnvPattern,
UnauthorizedExpressions: &unauthorizedExprs,
})
}
Expand All @@ -30,11 +30,11 @@ func BenchmarkValidateExpression_Complex(b *testing.B) {

for b.Loop() {
_ = validateSingleExpression(expression, ExpressionValidationOptions{
NeedsStepsRe: needsStepsRegex,
InputsRe: inputsRegex,
WorkflowCallInputsRe: workflowCallInputsRegex,
AwInputsRe: awInputsRegex,
EnvRe: envRegex,
NeedsStepsRe: NeedsStepsPattern,
InputsRe: InputsPattern,
WorkflowCallInputsRe: WorkflowCallInputsPattern,
AwInputsRe: AWInputsPattern,
EnvRe: EnvPattern,
UnauthorizedExpressions: &unauthorizedExprs,
})
}
Expand All @@ -47,11 +47,11 @@ func BenchmarkValidateExpression_NeedsOutputs(b *testing.B) {

for b.Loop() {
_ = validateSingleExpression(expression, ExpressionValidationOptions{
NeedsStepsRe: needsStepsRegex,
InputsRe: inputsRegex,
WorkflowCallInputsRe: workflowCallInputsRegex,
AwInputsRe: awInputsRegex,
EnvRe: envRegex,
NeedsStepsRe: NeedsStepsPattern,
InputsRe: InputsPattern,
WorkflowCallInputsRe: WorkflowCallInputsPattern,
AwInputsRe: AWInputsPattern,
EnvRe: EnvPattern,
UnauthorizedExpressions: &unauthorizedExprs,
})
}
Expand All @@ -64,11 +64,11 @@ func BenchmarkValidateExpression_StepsOutputs(b *testing.B) {

for b.Loop() {
_ = validateSingleExpression(expression, ExpressionValidationOptions{
NeedsStepsRe: needsStepsRegex,
InputsRe: inputsRegex,
WorkflowCallInputsRe: workflowCallInputsRegex,
AwInputsRe: awInputsRegex,
EnvRe: envRegex,
NeedsStepsRe: NeedsStepsPattern,
InputsRe: InputsPattern,
WorkflowCallInputsRe: WorkflowCallInputsPattern,
AwInputsRe: AWInputsPattern,
EnvRe: EnvPattern,
UnauthorizedExpressions: &unauthorizedExprs,
})
}
Expand Down
10 changes: 2 additions & 8 deletions pkg/workflow/secret_extraction.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,8 @@ import (

var secretLog = logger.New("workflow:secret_extraction")

// Pre-compiled regex for secret extraction (performance optimization)
// Matches: ${{ secrets.SECRET_NAME }} or ${{ secrets.SECRET_NAME || 'default' }}
var secretExprPattern = regexp.MustCompile(`\$\{\{\s*secrets\.([A-Z_][A-Z0-9_]*)\s*(?:\|\|.*?)?\s*\}\}`)

// Pre-compiled regex patterns for ExtractSecretsFromValue (performance optimization)
var (
// secretsExprFindPattern matches all ${{ ... }} expressions in a value
secretsExprFindPattern = regexp.MustCompile(`\$\{\{[^}]+\}\}`)
// secretsNamePattern extracts the secret variable name from an expression
secretsNamePattern = regexp.MustCompile(`secrets\.([A-Z_][A-Z0-9_]*)`)
)
Expand All @@ -34,7 +28,7 @@ type SecretExpression struct {
// - "${{ secrets.DD_SITE || 'datadoghq.com' }}" -> "DD_SITE"
// - "plain value" -> ""
func ExtractSecretName(value string) string {
matches := secretExprPattern.FindStringSubmatch(value)
matches := SecretExpressionPattern.FindStringSubmatch(value)
if len(matches) >= 2 {
return matches[1]
}
Expand All @@ -55,7 +49,7 @@ func ExtractSecretsFromValue(value string) map[string]string {

// Find all ${{ ... }} expressions in the value
// Pattern matches from ${{ to }} allowing nested content
expressions := secretsExprFindPattern.FindAllString(value, -1)
expressions := InlineExpressionPattern.FindAllString(value, -1)

// For each expression, check if it contains secrets.VARIABLE_NAME
// This handles both simple cases like "${{ secrets.TOKEN }}"
Expand Down
17 changes: 5 additions & 12 deletions pkg/workflow/template_injection_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,6 @@ var templateInjectionValidationLog = newValidationLogger("template_injection")

// Pre-compiled regex patterns for template injection detection
var (
// inlineExpressionRegex matches GitHub Actions template expressions ${{ ... }}
inlineExpressionRegex = regexp.MustCompile(`\$\{\{[^}]+\}\}`)

// unsafeContextRegex matches high-risk context expressions that could contain user input
// These patterns are particularly dangerous when used directly in shell commands
unsafeContextRegex = regexp.MustCompile(`\$\{\{\s*(github\.event\.|steps\.[^}]+\.outputs\.|inputs\.)[^}]+\}\}`)

// allowedRunScriptExpressionRegex matches trusted compiler-owned expressions that are
// intentionally rendered in generated run scripts and are not user-controlled.
allowedRunScriptExpressionRegex = regexp.MustCompile(`^\$\{\{\s*(env\.[^}]+|vars\.[^}]+|runner\.[^}]+|github\.(repository|run_id|workspace)|steps\.parse-guard-vars\.outputs\.(approval_labels|blocked_users|trusted_users)|job\.services\[[^]]+\]\.ports\[[^]]+\])\s*\}\}$`)
Expand All @@ -73,7 +66,7 @@ var (
// Used by the compiler regression guardrail to detect expressions that should have
// been rewritten to env variables.
func hasAnyExpressionInRunContent(yamlContent string) bool {
return hasExpressionInRunContent(yamlContent, inlineExpressionRegex)
return hasExpressionInRunContent(yamlContent, InlineExpressionPattern)
}

func hasExpressionInRunContent(yamlContent string, expressionRegex *regexp.Regexp) bool {
Expand Down Expand Up @@ -154,7 +147,7 @@ func validateNoTemplateInjectionFromParsed(workflow map[string]any) error {

for _, runContent := range runBlocks {
// Check if this run block contains inline expressions
if !inlineExpressionRegex.MatchString(runContent) {
if !InlineExpressionPattern.MatchString(runContent) {
continue
}

Expand All @@ -164,11 +157,11 @@ func validateNoTemplateInjectionFromParsed(workflow map[string]any) error {
contentWithoutHeredocs := removeHeredocContent(runContent)

// Extract all inline expressions from this run block (excluding heredocs)
expressions := inlineExpressionRegex.FindAllString(contentWithoutHeredocs, -1)
expressions := InlineExpressionPattern.FindAllString(contentWithoutHeredocs, -1)

// Check each expression for unsafe contexts
for _, expr := range expressions {
if unsafeContextRegex.MatchString(expr) {
if UnsafeContextPattern.MatchString(expr) {
// Found an unsafe pattern - extract a snippet for context
snippet := extractRunSnippet(contentWithoutHeredocs, expr)
violations = append(violations, TemplateInjectionViolation{
Expand Down Expand Up @@ -209,7 +202,7 @@ func validateNoGitHubExpressionsInRunScriptsFromParsed(workflow map[string]any)
// Align with template-injection validation: heredoc bodies are written to files
// and are not executed as shell commands, so they are excluded from scanning.
contentWithoutHeredocs := removeHeredocContent(runContent)
expressions := inlineExpressionRegex.FindAllString(contentWithoutHeredocs, -1)
expressions := InlineExpressionPattern.FindAllString(contentWithoutHeredocs, -1)
for _, expr := range expressions {
if allowedRunScriptExpressionRegex.MatchString(expr) {
continue
Expand Down
2 changes: 1 addition & 1 deletion pkg/workflow/template_injection_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
// validateNoTemplateInjection is a test helper that parses YAML and validates
// it for template injection vulnerabilities using validateNoTemplateInjectionFromParsed.
func validateNoTemplateInjection(yamlContent string) error {
if !unsafeContextRegex.MatchString(yamlContent) {
if !UnsafeContextPattern.MatchString(yamlContent) {
return nil
}
var workflow map[string]any
Expand Down
Loading