Skip to content

Commit ce02164

Browse files
committed
Use fewer API calls
1 parent f77f276 commit ce02164

File tree

2 files changed

+137
-48
lines changed

2 files changed

+137
-48
lines changed

internal/app/keyword_scan.go

Lines changed: 4 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package app
22

33
import (
4-
"context"
54
"encoding/base64"
65
"encoding/json"
76
"fmt"
@@ -12,7 +11,6 @@ import (
1211
"sync"
1312

1413
"github.com/fatih/color"
15-
"github.com/google/go-github/v57/github"
1614
)
1715

1816
// ResultScan is the final scan result.
@@ -109,24 +107,8 @@ func ScanAndPrintResult(client *http.Client, repo RepoSearchResult) {
109107
}
110108
}
111109

112-
// Fetch GitHub API info about the repo
113-
token := GetFlags().GithubAccessToken
114-
client := github.NewClient(nil).WithAuthToken(token)
115-
if client != nil {
116-
owner := strings.Split(repo.Repo, "/")[0]
117-
repoName := strings.Split(repo.Repo, "/")[1]
118-
TrackAPIRequest("ListCommits", fmt.Sprintf("Owner: %s, Repo: %s, Path: %s", owner, repoName, repo.File))
119-
commits, _, err := client.Repositories.ListCommits(context.Background(), owner, repoName, &github.CommitsListOptions{
120-
Path: repo.File,
121-
})
122-
if err != nil {
123-
fmt.Println(err)
124-
repo.SourceFileLastUpdated = ""
125-
} else {
126-
repo.SourceFileLastUpdated = commits[0].Commit.Author.Date.String()
127-
repo.SourceFileLastAuthorEmail = *commits[0].Commit.Author.Email
128-
}
129-
}
110+
// Commit info is now populated by git-based approach in search_api.go
111+
// No need for GitHub API calls here
130112

131113
resultRepoURL := GetRepoURLForSearchResult(repo)
132114

@@ -370,26 +352,8 @@ func ScanAndPrintResult(client *http.Client, repo RepoSearchResult) {
370352

371353
// Process and display matches
372354
if len(matches) > 0 {
373-
// Fetch GitHub API info about the repo
374-
token := GetFlags().GithubAccessToken
375-
client := github.NewClient(nil).WithAuthToken(token)
376-
if client != nil {
377-
// gh_repo_obj, _, err := client.Repositories.Get(strings.Split(repo.Repo, "/")[0], strings.Split(repo.Repo, "/")[1])
378-
// get repo's commits
379-
owner := strings.Split(repo.Repo, "/")[0]
380-
repoName := strings.Split(repo.Repo, "/")[1]
381-
TrackAPIRequest("ListCommits", fmt.Sprintf("Owner: %s, Repo: %s, Path: %s", owner, repoName, repo.File))
382-
commits, _, err := client.Repositories.ListCommits(context.Background(), owner, repoName, &github.CommitsListOptions{
383-
Path: repo.File,
384-
})
385-
if err != nil {
386-
fmt.Println(err)
387-
repo.SourceFileLastUpdated = ""
388-
} else {
389-
repo.SourceFileLastUpdated = commits[0].Commit.Author.Date.String()
390-
repo.SourceFileLastAuthorEmail = *commits[0].Commit.Author.Email
391-
}
392-
}
355+
// Commit info is now populated by git-based approach in search_api.go
356+
// No need for GitHub API calls here
393357

394358
resultRepoURL := GetRepoURLForSearchResult(repo)
395359
i := 0

internal/app/search_api.go

Lines changed: 133 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"math"
77
"net/http"
88
"os"
9+
"os/exec"
910
"regexp"
1011
"strconv"
1112
"strings"
@@ -122,7 +123,7 @@ func SearchWithAPI(queries []string) {
122123

123124
// Initialize the worker pool if not already done
124125
workerPool := GetGlobalPool()
125-
126+
// fmt.Println(result)
126127
for _, code_result := range result.CodeResults {
127128
// fmt.Println(code_result.GetPath())
128129
author_repo_str := code_result.GetRepository().GetOwner().GetLogin() + "/" + code_result.GetRepository().GetName()
@@ -134,15 +135,24 @@ func SearchWithAPI(queries []string) {
134135
sha = matches[1]
135136
}
136137

138+
// Get file commit information using git operations
139+
lastAuthor, lastUpdated := getFileCommitInfo(
140+
code_result.GetRepository().GetOwner().GetLogin(),
141+
code_result.GetRepository().GetName(),
142+
code_result.GetPath(),
143+
sha)
144+
137145
// Create a repo result object to pass to the worker
138146
repoResult := RepoSearchResult{
139-
Repo: author_repo_str,
140-
File: code_result.GetPath(),
141-
Raw: author_repo_str + "/" + sha + "/" + code_result.GetPath(),
142-
Source: "repo",
143-
Query: query,
144-
URL: "https://github.com/" + author_repo_str + "/blob/" + sha + "/" + code_result.GetPath(),
145-
TextMatches: code_result.TextMatches,
147+
Repo: author_repo_str,
148+
File: code_result.GetPath(),
149+
Raw: author_repo_str + "/" + sha + "/" + code_result.GetPath(),
150+
Source: "repo",
151+
Query: query,
152+
URL: "https://github.com/" + author_repo_str + "/blob/" + sha + "/" + code_result.GetPath(),
153+
SourceFileLastAuthorEmail: lastAuthor,
154+
SourceFileLastUpdated: lastUpdated,
155+
TextMatches: code_result.TextMatches,
146156
}
147157

148158
// Increment the wait group before submitting the job
@@ -170,6 +180,121 @@ func SearchWithAPI(queries []string) {
170180
}
171181
}
172182

183+
// getFileCommitInfo fetches the last commit information for a specific file using git operations
184+
func getFileCommitInfo(owner, repo, path, commitHash string) (lastAuthor, lastUpdated string) {
185+
// Create a temporary directory for git operations
186+
tempDir, err := os.MkdirTemp("", "git-hound-*")
187+
if err != nil {
188+
if GetFlags().Debug {
189+
fmt.Printf("[DEBUG] Error creating temp dir: %v\n", err)
190+
}
191+
return "", ""
192+
}
193+
defer os.RemoveAll(tempDir)
194+
195+
// Initialize empty git repository
196+
initCmd := exec.Command("git", "init")
197+
initCmd.Dir = tempDir
198+
if err := initCmd.Run(); err != nil {
199+
if GetFlags().Debug {
200+
fmt.Printf("[DEBUG] Error initializing git repo: %v\n", err)
201+
}
202+
return "", ""
203+
}
204+
205+
// Add remote origin
206+
remoteCmd := exec.Command("git", "remote", "add", "origin", fmt.Sprintf("https://github.com/%s/%s.git", owner, repo))
207+
remoteCmd.Dir = tempDir
208+
if err := remoteCmd.Run(); err != nil {
209+
if GetFlags().Debug {
210+
fmt.Printf("[DEBUG] Error adding remote: %v\n", err)
211+
}
212+
return "", ""
213+
}
214+
215+
// Fetch only the specific commit using --filter=blob:none
216+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
217+
defer cancel()
218+
219+
fetchCmd := exec.CommandContext(ctx, "git", "fetch", "origin", commitHash, "--filter=blob:none")
220+
fetchCmd.Dir = tempDir
221+
if err := fetchCmd.Run(); err != nil {
222+
if GetFlags().Debug {
223+
fmt.Printf("[DEBUG] Error fetching commit %s: %v\n", commitHash, err)
224+
}
225+
// Try alternative fetch method without filter
226+
fetchCmd2 := exec.CommandContext(ctx, "git", "fetch", "origin", commitHash)
227+
fetchCmd2.Dir = tempDir
228+
if err2 := fetchCmd2.Run(); err2 != nil {
229+
if GetFlags().Debug {
230+
fmt.Printf("[DEBUG] Error with alternative fetch for commit %s: %v\n", commitHash, err2)
231+
}
232+
return "", ""
233+
}
234+
}
235+
236+
// Get commit metadata using git cat-file
237+
catCmd := exec.CommandContext(ctx, "git", "cat-file", "-p", commitHash)
238+
catCmd.Dir = tempDir
239+
output, err := catCmd.Output()
240+
if err != nil {
241+
if GetFlags().Debug {
242+
fmt.Printf("[DEBUG] Error reading commit %s: %v\n", commitHash, err)
243+
}
244+
return "", ""
245+
}
246+
247+
// Parse commit object to extract author and date
248+
commitText := string(output)
249+
lines := strings.Split(commitText, "\n")
250+
251+
if GetFlags().Debug {
252+
fmt.Printf("[DEBUG] Commit object for %s:\n%s\n", commitHash, commitText)
253+
}
254+
255+
for _, line := range lines {
256+
if strings.HasPrefix(line, "author ") {
257+
// Parse author line: "author Name <email> timestamp timezone"
258+
parts := strings.Fields(line)
259+
if GetFlags().Debug {
260+
fmt.Printf("[DEBUG] Author line parts: %v\n", parts)
261+
}
262+
263+
if len(parts) >= 4 {
264+
// Extract email from <email> format
265+
emailRegex := regexp.MustCompile(`<([^>]+)>`)
266+
emailMatch := emailRegex.FindStringSubmatch(line)
267+
if len(emailMatch) > 1 {
268+
lastAuthor = emailMatch[1]
269+
}
270+
271+
// Extract timestamp (second to last field)
272+
timestampStr := parts[len(parts)-2]
273+
if GetFlags().Debug {
274+
fmt.Printf("[DEBUG] Timestamp string: %s\n", timestampStr)
275+
}
276+
if timestamp, err := strconv.ParseInt(timestampStr, 10, 64); err == nil {
277+
lastUpdated = time.Unix(timestamp, 0).Format(time.RFC3339)
278+
if GetFlags().Debug {
279+
fmt.Printf("[DEBUG] Parsed timestamp: %d -> %s\n", timestamp, lastUpdated)
280+
}
281+
} else {
282+
if GetFlags().Debug {
283+
fmt.Printf("[DEBUG] Error parsing timestamp %s: %v\n", timestampStr, err)
284+
}
285+
}
286+
}
287+
break
288+
}
289+
}
290+
291+
if GetFlags().Debug {
292+
fmt.Printf("[DEBUG] Found commit info for %s/%s/%s (commit %s): author=%s, updated=%s\n", owner, repo, path, commitHash, lastAuthor, lastUpdated)
293+
}
294+
295+
return lastAuthor, lastUpdated
296+
}
297+
173298
// extractResetTime extracts the number of seconds until the rate limit resets from the error message.
174299
func extractResetTime(errorMessage string) int {
175300
re := regexp.MustCompile(`rate reset in (\d+)s`)

0 commit comments

Comments
 (0)