Skip to content

Commit 877ca91

Browse files
committed
Add --match-query
1 parent 33ec1d3 commit 877ca91

File tree

5 files changed

+281
-8
lines changed

5 files changed

+281
-8
lines changed

cmd/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ func InitializeFlags() {
4646
rootCmd.PersistentFlags().BoolVar(&app.GetFlags().NoKeywords, "no-keywords", false, "Don't search for built-in keywords")
4747
rootCmd.PersistentFlags().BoolVar(&app.GetFlags().ManyResults, "many-results", false, "Search >100 pages with filtering hack")
4848
rootCmd.PersistentFlags().BoolVar(&app.GetFlags().AllResults, "all-results", false, "Print all results, even if they do not contain secrets")
49+
rootCmd.PersistentFlags().BoolVar(&app.GetFlags().MatchQuery, "match-query", false, "Match on the user's query text (removes qualifiers like repo:, language:)")
4950
rootCmd.PersistentFlags().BoolVar(&app.GetFlags().JsonOutput, "json", false, "Print results in JSON format")
5051
rootCmd.PersistentFlags().BoolVar(&app.GetFlags().FastMode, "fast", false, "Skip file grepping and only return search preview")
5152
rootCmd.PersistentFlags().IntVar(&app.GetFlags().Threads, "threads", 20, "Threads to dig with")

internal/app/keyword_scan.go

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,265 @@ func ScanAndPrintResult(client *http.Client, repo RepoSearchResult) {
7575
color.New(color.Faint).Println("[" + repo.File + "]")
7676
color.New(color.Faint).Println(repo.Contents)
7777
}
78+
} else if GetFlags().MatchQuery {
79+
// Handle match-query mode - still perform regex processing but also return query context
80+
// Get pointer matches using the same regex processing as normal mode
81+
matches, score := GetMatchesForString(repo.Contents, repo, true)
82+
83+
// Process potential additional matches from digging
84+
if (GetFlags().DigCommits || GetFlags().DigRepo) && RepoIsUnpopular(client, repo) && score > -1 {
85+
// Lock the map for thread-safe access
86+
mapMutex.Lock()
87+
repoAlreadyScanned := scannedRepos[repo.Repo]
88+
if !repoAlreadyScanned {
89+
scannedRepos[repo.Repo] = true
90+
}
91+
mapMutex.Unlock()
92+
93+
if !repoAlreadyScanned {
94+
regex := regexp.MustCompile("(?i)(alexa|urls|adblock|domain|dns|top1000|top\\-1000|httparchive" +
95+
"|blacklist|hosts|ads|whitelist|crunchbase|tweets|tld|hosts\\.txt" +
96+
"|host\\.txt|aquatone|recon\\-ng|hackerone|bugcrowd|xtreme|list|tracking|malicious|ipv(4|6)|host\\.txt)")
97+
fileNameMatches := regex.FindAllString(repo.Repo, -1)
98+
if len(fileNameMatches) == 0 {
99+
// Get additional matches from Dig function
100+
dig_matches := Dig(repo)
101+
for _, match := range dig_matches {
102+
// Add the dig-files attribute directly to the pointer match
103+
match.Attributes = append(match.Attributes, "dig-files")
104+
105+
// Add to matches - no need to copy since Dig now returns []*Match
106+
matches = append(matches, match)
107+
}
108+
}
109+
}
110+
}
111+
112+
// Fetch GitHub API info about the repo
113+
token := GetFlags().GithubAccessToken
114+
client := github.NewClient(nil).WithAuthToken(token)
115+
if client != nil {
116+
owner := strings.Split(repo.Repo, "/")[0]
117+
repoName := strings.Split(repo.Repo, "/")[1]
118+
TrackAPIRequest("ListCommits", fmt.Sprintf("Owner: %s, Repo: %s, Path: %s", owner, repoName, repo.File))
119+
commits, _, err := client.Repositories.ListCommits(context.Background(), owner, repoName, &github.CommitsListOptions{
120+
Path: repo.File,
121+
})
122+
if err != nil {
123+
fmt.Println(err)
124+
repo.SourceFileLastUpdated = ""
125+
} else {
126+
repo.SourceFileLastUpdated = commits[0].Commit.Author.Date.String()
127+
repo.SourceFileLastAuthorEmail = *commits[0].Commit.Author.Email
128+
}
129+
}
130+
131+
resultRepoURL := GetRepoURLForSearchResult(repo)
132+
133+
// If we found regex matches, process them normally but add query context
134+
if len(matches) > 0 {
135+
i := 0
136+
for _, result := range matches {
137+
// Create the result payload
138+
resultPayload := map[string]interface{}{
139+
"repo": resultRepoURL,
140+
"context": result.Line.Text,
141+
"match": result.Line.Text[result.Line.MatchIndex:result.Line.MatchEndIndex],
142+
"attributes": append(result.Attributes, "query: "+repo.Query), // Add query context to attributes
143+
"file_last_updated": repo.SourceFileLastUpdated,
144+
"file_last_author": repo.SourceFileLastAuthorEmail,
145+
"url": GetResultLink(repo, result),
146+
}
147+
148+
// For dug matches, update the file information while maintaining the structure
149+
if len(result.Attributes) > 0 && result.Attributes[0] == "dig-files" {
150+
resultPayload["file"] = result.File
151+
// Extract the base URL and commit hash from the original URL
152+
baseURL := strings.Split(repo.URL, "/blob/")[0]
153+
commitHash := strings.Split(repo.URL, "/blob/")[1]
154+
commitHash = strings.Split(commitHash, "/")[0]
155+
// Construct new URL with the file path from result.File
156+
resultPayload["url"] = fmt.Sprintf("%s/blob/%s/%s", baseURL, commitHash, result.File)
157+
}
158+
159+
// Use mutex to protect access to uniqueMatches map
160+
matchKey := fmt.Sprintf("%s|%s", resultPayload["match"], resultRepoURL)
161+
// For dig-files matches, include the file path in the deduplication key
162+
if len(result.Attributes) > 0 && result.Attributes[0] == "dig-files" {
163+
matchKey = fmt.Sprintf("%s|%s|%s", resultPayload["match"], resultRepoURL, result.File)
164+
}
165+
mapMutex.Lock()
166+
isDuplicate := uniqueMatches[matchKey]
167+
if !isDuplicate {
168+
uniqueMatches[matchKey] = true
169+
}
170+
mapMutex.Unlock()
171+
172+
if isDuplicate {
173+
continue
174+
}
175+
176+
if i == 0 {
177+
if !GetFlags().ResultsOnly && !GetFlags().JsonOutput {
178+
color.Green("[" + resultRepoURL + "]")
179+
}
180+
}
181+
i += 1
182+
if GetFlags().ResultsOnly {
183+
fmt.Println(result.Text)
184+
} else {
185+
if GetFlags().JsonOutput {
186+
a, _ := json.Marshal(resultPayload)
187+
fmt.Println(string(a))
188+
} else {
189+
PrintContextLine(result.Line)
190+
PrintPatternLine(result)
191+
PrintAttributes(result)
192+
// Always print the file path
193+
if len(result.Attributes) > 0 && result.Attributes[0] == "dig-files" {
194+
color.New(color.Faint).Println("file: " + result.File)
195+
// Construct URL for dig-files matches
196+
baseURL := strings.Split(repo.URL, "/blob/")[0]
197+
commitHash := strings.Split(repo.URL, "/blob/")[1]
198+
commitHash = strings.Split(commitHash, "/")[0]
199+
digURL := fmt.Sprintf("%s/blob/%s/%s", baseURL, commitHash, result.File)
200+
color.New(color.Faint).Println(digURL)
201+
} else {
202+
color.New(color.Faint).Println("file: " + repo.File)
203+
color.New(color.Faint).Println(GetResultLink(repo, result))
204+
}
205+
// Add query context to output
206+
color.New(color.Faint).Println("query: " + repo.Query)
207+
}
208+
}
209+
if GetFlags().Dashboard && GetFlags().InsertKey != "" {
210+
resultJSON, err := json.Marshal(resultPayload)
211+
if err == nil {
212+
searchID := GetFlags().SearchID
213+
if searchID != "" {
214+
if GetFlags().Trufflehog {
215+
SendMessageToWebSocket(fmt.Sprintf(`{"event": "search_result", "insertToken": "%s", "searchID": "%s", "result": %s}`, GetFlags().InsertKey, searchID, string(resultJSON)))
216+
} else {
217+
escapedQuery, _ := json.Marshal(repo.Query)
218+
// For dig-files matches, ensure the file path and URL are correctly set
219+
if len(result.Attributes) > 0 && result.Attributes[0] == "dig-files" {
220+
resultPayload["file"] = result.File
221+
baseURL := strings.Split(repo.URL, "/blob/")[0]
222+
commitHash := strings.Split(repo.URL, "/blob/")[1]
223+
commitHash = strings.Split(commitHash, "/")[0]
224+
resultPayload["url"] = fmt.Sprintf("%s/blob/%s/%s", baseURL, commitHash, result.File)
225+
resultJSON, _ = json.Marshal(resultPayload)
226+
}
227+
SendMessageToWebSocket(fmt.Sprintf(`{"event": "search_result", "insertToken": "%s", "searchID": "%s", "result": %s, "search_term": %s}`, GetFlags().InsertKey, searchID, string(resultJSON), string(escapedQuery)))
228+
}
229+
} else {
230+
if GetFlags().Trufflehog {
231+
SendMessageToWebSocket(fmt.Sprintf(`{"event": "search_result", "insertToken": "%s", "result": %s}`, GetFlags().InsertKey, string(resultJSON)))
232+
} else {
233+
escapedQuery, _ := json.Marshal(repo.Query)
234+
// For dig-files matches, ensure the file path and URL are correctly set
235+
if len(result.Attributes) > 0 && result.Attributes[0] == "dig-files" {
236+
resultPayload["file"] = result.File
237+
baseURL := strings.Split(repo.URL, "/blob/")[0]
238+
commitHash := strings.Split(repo.URL, "/blob/")[1]
239+
commitHash = strings.Split(commitHash, "/")[0]
240+
resultPayload["url"] = fmt.Sprintf("%s/blob/%s/%s", baseURL, commitHash, result.File)
241+
resultJSON, _ = json.Marshal(resultPayload)
242+
}
243+
SendMessageToWebSocket(fmt.Sprintf(`{"event": "search_result", "insertToken": "%s", "result": %s, "search_term": %s}`, GetFlags().InsertKey, string(resultJSON), string(escapedQuery)))
244+
}
245+
}
246+
} else {
247+
color.Red("Error marshalling result to JSON: %v", err)
248+
}
249+
}
250+
}
251+
if GetFlags().Debug {
252+
fmt.Println("Finished scanning " + repo.Repo + "...")
253+
}
254+
255+
// Clean up the matches by returning them to the pool
256+
PutMatches(matches)
257+
} else {
258+
searchContext := ""
259+
matchText := ""
260+
261+
// Use TextMatches from GitHub API if available for better context
262+
if len(repo.TextMatches) > 0 {
263+
// Use the first text match for context and match text
264+
textMatch := repo.TextMatches[0]
265+
searchContext = textMatch.GetFragment()
266+
// Get the first match text from the matches array
267+
if len(textMatch.Matches) > 0 {
268+
matchText = textMatch.Matches[0].GetText()
269+
} else {
270+
matchText = fmt.Sprintf("Search query match: %s", repo.Query)
271+
}
272+
} else {
273+
// Fallback to file content if no TextMatches available
274+
if len(repo.Contents) > 200 {
275+
// If file is large, show first 200 chars as context
276+
searchContext = fmt.Sprintf("%s...", repo.Contents[:200])
277+
} else {
278+
// If file is small, show full content
279+
searchContext = fmt.Sprintf("%s", repo.Contents)
280+
}
281+
matchText = fmt.Sprintf("Search query match: %s", repo.Query)
282+
}
283+
284+
resultPayload := map[string]interface{}{
285+
"repo": resultRepoURL,
286+
"context": searchContext,
287+
"match": matchText,
288+
"attributes": []string{"Initial Query Match", "query: " + repo.Query},
289+
"file_last_updated": repo.SourceFileLastUpdated,
290+
"file_last_author": repo.SourceFileLastAuthorEmail,
291+
"url": repo.URL,
292+
}
293+
294+
if GetFlags().JsonOutput {
295+
a, _ := json.Marshal(resultPayload)
296+
fmt.Println(string(a))
297+
} else {
298+
if !GetFlags().ResultsOnly {
299+
color.Green("[" + resultRepoURL + "]")
300+
}
301+
if GetFlags().ResultsOnly {
302+
// Show the matched text from TextMatches if available, otherwise show file content
303+
if len(repo.TextMatches) > 0 && len(repo.TextMatches[0].Matches) > 0 {
304+
fmt.Println(repo.TextMatches[0].Matches[0].GetText())
305+
} else {
306+
fmt.Println(repo.Contents)
307+
}
308+
} else {
309+
color.New(color.Faint).Println("file: " + repo.File)
310+
color.New(color.Faint).Println("query: " + repo.Query)
311+
// Show the fragment from TextMatches if available, otherwise show file content
312+
if len(repo.TextMatches) > 0 {
313+
color.New(color.Faint).Println(repo.TextMatches[0].GetFragment())
314+
} else {
315+
color.New(color.Faint).Println(repo.Contents)
316+
}
317+
}
318+
}
319+
320+
// Handle dashboard mode if enabled
321+
if GetFlags().Dashboard && GetFlags().InsertKey != "" {
322+
resultJSON, err := json.Marshal(resultPayload)
323+
if err == nil {
324+
searchID := GetFlags().SearchID
325+
if searchID != "" {
326+
escapedQuery, _ := json.Marshal(repo.Query)
327+
SendMessageToWebSocket(fmt.Sprintf(`{"event": "search_result", "insertToken": "%s", "searchID": "%s", "result": %s, "search_term": %s}`, GetFlags().InsertKey, searchID, string(resultJSON), string(escapedQuery)))
328+
} else {
329+
escapedQuery, _ := json.Marshal(repo.Query)
330+
SendMessageToWebSocket(fmt.Sprintf(`{"event": "search_result", "insertToken": "%s", "result": %s, "search_term": %s}`, GetFlags().InsertKey, string(resultJSON), string(escapedQuery)))
331+
}
332+
} else {
333+
color.Red("Error marshalling result to JSON: %v", err)
334+
}
335+
}
336+
}
78337
} else {
79338
// Get pointer matches
80339
matches, score := GetMatchesForString(repo.Contents, repo, true)

internal/app/options.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ type Flags struct {
1616
NoFiles bool
1717
NoKeywords bool
1818
AllResults bool
19+
MatchQuery bool
1920
FastMode bool
2021
Threads int
2122
Debug bool

internal/app/search_api.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ func SearchWithAPI(queries []string) {
6868
},
6969
}
7070

71+
// Enable text match metadata when in match-query mode
72+
if GetFlags().MatchQuery {
73+
options.TextMatch = true
74+
}
75+
7176
http_client := http.Client{}
7277
rt := WithHeader(http_client.Transport)
7378
rt.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36")
@@ -79,16 +84,20 @@ func SearchWithAPI(queries []string) {
7984
if GetFlags().Debug {
8085
TrackAPIRequest("Search.Code", fmt.Sprintf("Query: %s, Page: %d", query, page))
8186
}
87+
8288
result, _, err := client.Search.Code(context.Background(), query, &options)
89+
// fmt.Println(result)
8390
for err != nil {
84-
fmt.Println(err)
91+
// fmt.Println(err)
8592
if strings.Contains(err.Error(), "ERROR_TYPE_QUERY_PARSING_FATAL") {
8693
color.Red("[!] Invalid query: %s (maybe you need to use quotes?)", query)
8794
os.Exit(1)
8895
}
8996
resetTime := extractResetTime(err.Error())
9097
sleepDuration := resetTime + 3
91-
color.Yellow("[!] GitHub API rate limit exceeded. Waiting %d seconds...", sleepDuration)
98+
if !GetFlags().JsonOutput {
99+
color.Yellow("[!] GitHub API rate limit exceeded. Waiting %d seconds...", sleepDuration)
100+
}
92101
time.Sleep(time.Duration(sleepDuration) * time.Second)
93102
if GetFlags().Debug {
94103
TrackAPIRequest("Search.Code", fmt.Sprintf("Query: %s, Page: %d (retry)", query, page))
@@ -124,12 +133,13 @@ func SearchWithAPI(queries []string) {
124133

125134
// Create a repo result object to pass to the worker
126135
repoResult := RepoSearchResult{
127-
Repo: author_repo_str,
128-
File: code_result.GetPath(),
129-
Raw: author_repo_str + "/" + sha + "/" + code_result.GetPath(),
130-
Source: "repo",
131-
Query: query,
132-
URL: "https://github.com/" + author_repo_str + "/blob/" + sha + "/" + code_result.GetPath(),
136+
Repo: author_repo_str,
137+
File: code_result.GetPath(),
138+
Raw: author_repo_str + "/" + sha + "/" + code_result.GetPath(),
139+
Source: "repo",
140+
Query: query,
141+
URL: "https://github.com/" + author_repo_str + "/blob/" + sha + "/" + code_result.GetPath(),
142+
TextMatches: code_result.TextMatches,
133143
}
134144

135145
// Increment the wait group before submitting the job

internal/app/search_ui.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"time"
1515

1616
"github.com/fatih/color"
17+
"github.com/google/go-github/v57/github"
1718
"github.com/spf13/viper"
1819
)
1920

@@ -28,6 +29,7 @@ type RepoSearchResult struct {
2829
URL string
2930
SourceFileLastUpdated string
3031
SourceFileLastAuthorEmail string
32+
TextMatches []*github.TextMatch
3133
searchOptions *SearchOptions
3234
}
3335

0 commit comments

Comments
 (0)