Skip to content

Commit fdb9669

Browse files
Merge pull request #986 from SSWConsulting/strip-slash
♻️ Strip trailing slashes from URLs
2 parents 5f3d89e + fc24566 commit fdb9669

1 file changed

Lines changed: 21 additions & 2 deletions

File tree

docker/sswlinkauditor.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,21 @@ func getHref(t html.Token) (ok bool, href string) {
8080
return
8181
}
8282

83+
func stripTrailingSlash(url string) string {
84+
if url == "" {
85+
return url
86+
}
87+
88+
parsed, err := urlP.Parse(url)
89+
if err != nil {
90+
// If parsing fails, do simple string trim
91+
return strings.TrimRight(url, "/")
92+
}
93+
94+
// Strip trailing slash from path
95+
parsed.Path = strings.TrimRight(parsed.Path, "/")
96+
return parsed.String()
97+
}
8398

8499
func addClientHeaders(r *http.Request) {
85100
if r != nil {
@@ -141,9 +156,11 @@ func getRedirectChainFinalUrl(url string) string {
141156
func check(link Link, linkch chan LinkStatus, number int) {
142157
fmt.Println("CHEC", number, link.url)
143158

159+
normalizedUrl := stripTrailingSlash(link.url)
160+
144161
ctx, cancel := context.WithCancel(context.Background())
145162
defer cancel()
146-
r, e := http.NewRequestWithContext(ctx, "GET", link.url, nil)
163+
r, e := http.NewRequestWithContext(ctx, "GET", normalizedUrl, nil)
147164
if r != nil {
148165
addClientHeaders(r)
149166
r.Header.Add("Accept", "*/*")
@@ -220,7 +237,9 @@ func isSameOriginAndPath(baseUrl string, targetUrl string) bool {
220237

221238
func crawl(link Link, ch chan Link, linkch chan LinkStatus, number int) {
222239
fmt.Println("CRAW", number, link.url)
223-
resp, err := noRedirectsClient.Get(link.url)
240+
241+
normalizedUrl := stripTrailingSlash(link.url)
242+
resp, err := noRedirectsClient.Get(normalizedUrl)
224243
dnsErr := new(net.DNSError)
225244

226245
defer func() {

0 commit comments

Comments
 (0)