From 83b66372a35fa79345663759d35411f677f2912e Mon Sep 17 00:00:00 2001 From: Nabeel Date: Tue, 18 Nov 2025 18:11:23 +0500 Subject: [PATCH 1/8] Added pagination and retry logic on rate-limit errors in docker registry list images calls --- pkg/sources/docker/registries.go | 334 ++++++++++++++++++++++--------- 1 file changed, 239 insertions(+), 95 deletions(-) diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index 61ae89146bc1..9ed56dee55cd 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -8,6 +8,7 @@ import ( "net/http" "net/url" "path" + "strconv" "strings" "time" ) @@ -45,9 +46,72 @@ func MakeRegistryFromNamespace(namespace string) Registry { return registry } -// === DockerHub registry === +const ( + maxRetriesOnRateLimit = 5 + maxSleepTime = 30 * time.Second +) + +func requestWithRateLimit(ctx context.Context, method, urlStr string, headers http.Header) (*http.Response, error) { + for attempts := 0; attempts < maxRetriesOnRateLimit; attempts++ { + req, err := http.NewRequestWithContext(ctx, method, urlStr, http.NoBody) + if err != nil { + return nil, err + } + + // Copy headers each time to avoid mutating the caller's map. + for k, vs := range headers { + for _, v := range vs { + req.Header.Add(k, v) + } + } + + resp, err := defaultHTTPClient.Do(req) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusTooManyRequests { + return resp, nil + } + + // We'll honor Retry-After if it's present, otherwise back off a little. + retryAfter := resp.Header.Get("Retry-After") -// DockerHub implements the Registry interface for Docker Hub. + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + + sleepFor := 5 * time.Second // 5 second default backoff + + if retryAfter != "" { + // Retry-After can be either seconds or an HTTP date. + if secs, err := strconv.Atoi(retryAfter); err == nil && secs >= 0 { + sleepFor = time.Duration(secs) * time.Second + } else if t, err := http.ParseTime(retryAfter); err == nil { + if duration := time.Until(t); duration > 0 { + sleepFor = duration + } + } + } + + // Rely on our own max sleep time instead of Retry-After, in case it's too long. + if sleepFor > maxSleepTime { + sleepFor = maxSleepTime + } + + // Respect context cancellation while sleeping. + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(sleepFor): + } + } + + return nil, fmt.Errorf("rate limited (HTTP status 429) after %d attempts", maxRetriesOnRateLimit) +} + +// === Docker Hub Registry === + +// DockerHub implements the Registry interface for hub.docker.com. type DockerHub struct { Token string } @@ -68,53 +132,64 @@ func (d *DockerHub) WithRegistryToken(registryToken string) { // ListImages lists all images under a Docker Hub namespace using Docker Hub's API. func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, error) { - url := &url.URL{ + baseURL := &url.URL{ Scheme: "https", Host: "hub.docker.com", Path: path.Join("v2", "namespaces", namespace, "repositories"), } - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url.String(), http.NoBody) - if err != nil { - return nil, err - } + allImages := []string{} + nextURL := baseURL.String() + headers := http.Header{} if d.Token != "" { - req.Header.Set("Authorization", "Bearer "+d.Token) + headers.Set("Authorization", "Bearer "+d.Token) } - resp, err := defaultHTTPClient.Do(req) - if err != nil { - return nil, err - } + for nextURL != "" { + resp, err := requestWithRateLimit(ctx, http.MethodGet, nextURL, headers) + if err != nil { + return nil, err + } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() + defer func() { + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + }() - responseBodyByte, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } - switch resp.StatusCode { - case http.StatusOK: - var allImages = make([]string, 0) + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to list dockerhub images: unexpected status code: %d", resp.StatusCode) + } - var listImagesResp dockerhubResp - if err := json.Unmarshal(responseBodyByte, &listImagesResp); err != nil { + var page dockerhubResp + if err := json.Unmarshal(body, &page); err != nil { return nil, err } - for _, image := range listImagesResp.Results { + for _, image := range page.Results { allImages = append(allImages, fmt.Sprintf("%s/%s", namespace, image.Name)) // / } - return allImages, nil - default: - return nil, fmt.Errorf("failed to list dockerhub images: unexpected status code: %d", resp.StatusCode) + if page.Next == "" { + break + } + + // page.Next may be absolute or relative. + next, err := url.Parse(page.Next) + if err != nil { + return nil, err + } + + // ResolveReference handles both absolute and relative URLs. + nextURL = baseURL.ResolveReference(next).String() } + + return allImages, nil } // === Red Hat Quay Registry === @@ -126,7 +201,9 @@ type Quay struct { // quayResp models the JSON structure returned by Quay's /api/v1/repository endpoint. type quayResp struct { - Repositories []Image `json:"repositories"` + Repositories []Image `json:"repositories"` + HasAdditional bool `json:"has_additional"` + Page int `json:"page"` } func (q *Quay) Name() string { @@ -140,59 +217,80 @@ func (q *Quay) WithRegistryToken(registryToken string) { // ListImages lists all images under a Quay namespace. func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, error) { quayNamespace := path.Base(namespace) // quay.io/ -> namespace - url := &url.URL{ + baseURL := &url.URL{ Scheme: "https", Host: "quay.io", Path: path.Join("api", "v1", "repository"), } - query := url.Query() - query.Set("namespace", quayNamespace) - query.Set("public", "true") - query.Set("private", "true") - url.RawQuery = query.Encode() - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url.String(), http.NoBody) - if err != nil { - return nil, err - } + allImages := []string{} + nextPageToken := "" + headers := http.Header{} if q.Token != "" { - req.Header.Set("Authorization", "Bearer "+q.Token) + headers.Set("Authorization", "Bearer "+q.Token) } - resp, err := defaultHTTPClient.Do(req) - if err != nil { - return nil, err - } + for { + u := *baseURL + query := u.Query() + query.Set("namespace", quayNamespace) + query.Set("public", "true") + if nextPageToken != "" { + query.Set("next_page", nextPageToken) + } + u.RawQuery = query.Encode() - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() + resp, err := requestWithRateLimit(ctx, http.MethodGet, u.String(), headers) + if err != nil { + return nil, err + } - responseBodyByte, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } + defer func() { + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + }() + + body, err := io.ReadAll(resp.Body) + + if err != nil { + return nil, err + } - switch resp.StatusCode { - case http.StatusOK: - var allImages = make([]string, 0) + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to list quay images: unexpected status code: %d", resp.StatusCode) + } - var listImagesResp quayResp - if err := json.Unmarshal(responseBodyByte, &listImagesResp); err != nil { + // Quay includes repositories plus pagination hints in the body. + var page quayResp + if err := json.Unmarshal(body, &page); err != nil { return nil, err } - for _, image := range listImagesResp.Repositories { + for _, image := range page.Repositories { allImages = append(allImages, fmt.Sprintf("%s/%s", namespace, image.Name)) // quay.io// } - return allImages, nil - default: - return nil, fmt.Errorf("failed to list quay images: unexpected status code: %d", resp.StatusCode) + if !page.HasAdditional { + break + } + + // Newer Quay API versions may include "next_page" in the top-level JSON. + // To support that without introducing another struct, fall back to + // parsing it generically when HasAdditional is true. + var raw map[string]any + if err := json.Unmarshal(body, &raw); err != nil { + return nil, err + } + if np, ok := raw["next_page"].(string); ok && np != "" { + nextPageToken = np + } else { + // No token – stop to avoid an infinite loop. + break + } } + + return allImages, nil } // === GHCR Registry === @@ -210,60 +308,106 @@ func (g *GHCR) WithRegistryToken(registryToken string) { g.Token = registryToken } -// ListImages lists all images under a Quay namespace. +// parseNextLinkURL extracts the URL with rel="next" from a GitHub Link header, if present. +func parseNextLinkURL(linkHeader string) string { + if linkHeader == "" { + return "" + } + + parts := strings.Split(linkHeader, ",") + for _, part := range parts { + section := strings.Split(strings.TrimSpace(part), ";") + if len(section) < 2 { + continue + } + + linkPart := strings.TrimSpace(section[0]) + if !strings.HasPrefix(linkPart, "<") || !strings.HasSuffix(linkPart, ">") { + continue + } + urlStr := strings.Trim(linkPart, "<>") + + rel := "" + for _, attr := range section[1:] { + attr = strings.TrimSpace(attr) + if strings.HasPrefix(attr, "rel=") { + rel = strings.Trim(strings.TrimPrefix(attr, "rel="), "\"") + break + } + } + + if rel == "next" { + return urlStr + } + } + + return "" +} + +// ListImages lists all images under a GHCR namespace. func (g *GHCR) ListImages(ctx context.Context, namespace string) ([]string, error) { ghcrNamespace := path.Base(namespace) // ghcr.io/ -> namespace - url := &url.URL{ + // Default to "users", which works for user namespaces. Organisation support + // can be added in the future if needed. + baseURL := &url.URL{ Scheme: "https", Host: "api.github.com", Path: path.Join("users", ghcrNamespace, "packages"), } - query := url.Query() - query.Set("package_type", "container") - url.RawQuery = query.Encode() + allImages := []string{} - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url.String(), http.NoBody) - if err != nil { - return nil, err - } + nextURL := func() string { + u := *baseURL + q := u.Query() + q.Set("package_type", "container") + q.Set("per_page", "100") + u.RawQuery = q.Encode() + return u.String() + }() - // https://stackoverflow.com/questions/72732582/using-github-packages-without-personal-access-token + headers := http.Header{} if g.Token != "" { - req.Header.Set("Authorization", "Bearer "+g.Token) + headers.Set("Authorization", "Bearer "+g.Token) } + // GitHub recommends explicitly sending the v3 media type. + headers.Set("Accept", "application/vnd.github+json") - resp, err := defaultHTTPClient.Do(req) - if err != nil { - return nil, err - } + for nextURL != "" { + resp, err := requestWithRateLimit(ctx, http.MethodGet, nextURL, headers) + if err != nil { + return nil, err + } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() + defer func() { + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + }() - responseBodyByte, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } - switch resp.StatusCode { - case http.StatusOK: - var allImages = make([]string, 0) + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to list ghcr images: unexpected status code: %d", resp.StatusCode) + } - var listImagesResp []Image - if err := json.Unmarshal(responseBodyByte, &listImagesResp); err != nil { + // The GHCR packages list returns an array of package objects. We only + // care about the "name" field at this layer, so reuse the Image struct. + var page []Image + if err := json.Unmarshal(body, &page); err != nil { return nil, err } - for _, image := range listImagesResp { + for _, image := range page { allImages = append(allImages, fmt.Sprintf("%s/%s", namespace, image.Name)) // ghcr.io// } - return allImages, nil - default: - return nil, fmt.Errorf("failed to list ghcr images: unexpected status code: %d", resp.StatusCode) + link := resp.Header.Get("Link") + nextURL = parseNextLinkURL(link) } + + return allImages, nil } From 48477ffd076bc17d70fd011057a08869277055c5 Mon Sep 17 00:00:00 2001 From: Nabeel Date: Wed, 19 Nov 2025 13:54:36 +0500 Subject: [PATCH 2/8] added rate limit error failure test --- pkg/sources/docker/registries.go | 48 ++++++++++++++++++++------- pkg/sources/docker/registries_test.go | 41 +++++++++++++++++++++++ 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index 9ed56dee55cd..af24158f765d 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -23,10 +23,10 @@ type Image struct { // Registry is an interface for any Docker/OCI registry implementation that can list all images under a given namespace. type Registry interface { - Name() string // return name of the registry - WithRegistryToken(registryToken string) // set token for registry - // TODO: Handle pagination and rate limits for list images API Call + Name() string // return name of the registry + WithRegistryToken(registryToken string) // set token for registry ListImages(ctx context.Context, namespace string) ([]string, error) // list all images + WithClient() *http.Client // return the HTTP client to use } // MakeRegistryFromNamespace returns a Registry implementation @@ -51,7 +51,7 @@ const ( maxSleepTime = 30 * time.Second ) -func requestWithRateLimit(ctx context.Context, method, urlStr string, headers http.Header) (*http.Response, error) { +func requestWithRateLimit(ctx context.Context, client *http.Client, method, urlStr string, headers http.Header) (*http.Response, error) { for attempts := 0; attempts < maxRetriesOnRateLimit; attempts++ { req, err := http.NewRequestWithContext(ctx, method, urlStr, http.NoBody) if err != nil { @@ -65,7 +65,7 @@ func requestWithRateLimit(ctx context.Context, method, urlStr string, headers ht } } - resp, err := defaultHTTPClient.Do(req) + resp, err := client.Do(req) if err != nil { return nil, err } @@ -74,7 +74,7 @@ func requestWithRateLimit(ctx context.Context, method, urlStr string, headers ht return resp, nil } - // We'll honor Retry-After if it's present, otherwise back off a little. + // Honor Retry-After if it's present, otherwise back off a little. retryAfter := resp.Header.Get("Retry-After") _, _ = io.Copy(io.Discard, resp.Body) @@ -113,7 +113,8 @@ func requestWithRateLimit(ctx context.Context, method, urlStr string, headers ht // DockerHub implements the Registry interface for hub.docker.com. type DockerHub struct { - Token string + Token string + Client *http.Client } // dockerhubResp models Docker Hub's /v2/namespaces//repositories API response. @@ -130,6 +131,13 @@ func (d *DockerHub) WithRegistryToken(registryToken string) { d.Token = registryToken } +func (d *DockerHub) WithClient() *http.Client { + if d != nil && d.Client != nil { + return d.Client + } + return defaultHTTPClient +} + // ListImages lists all images under a Docker Hub namespace using Docker Hub's API. func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, error) { baseURL := &url.URL{ @@ -147,7 +155,7 @@ func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, } for nextURL != "" { - resp, err := requestWithRateLimit(ctx, http.MethodGet, nextURL, headers) + resp, err := requestWithRateLimit(ctx, d.WithClient(), http.MethodGet, nextURL, headers) if err != nil { return nil, err } @@ -196,7 +204,8 @@ func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, // Quay implements the Registry interface for Quay.io. type Quay struct { - Token string + Token string + Client *http.Client } // quayResp models the JSON structure returned by Quay's /api/v1/repository endpoint. @@ -214,6 +223,13 @@ func (q *Quay) WithRegistryToken(registryToken string) { q.Token = registryToken } +func (q *Quay) WithClient() *http.Client { + if q != nil && q.Client != nil { + return q.Client + } + return defaultHTTPClient +} + // ListImages lists all images under a Quay namespace. func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, error) { quayNamespace := path.Base(namespace) // quay.io/ -> namespace @@ -241,7 +257,7 @@ func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, erro } u.RawQuery = query.Encode() - resp, err := requestWithRateLimit(ctx, http.MethodGet, u.String(), headers) + resp, err := requestWithRateLimit(ctx, q.WithClient(), http.MethodGet, u.String(), headers) if err != nil { return nil, err } @@ -297,7 +313,8 @@ func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, erro // GHCR implements the Registry interface for GHCR.io. type GHCR struct { - Token string // https://github.com/github/roadmap/issues/558 + Token string // https://github.com/github/roadmap/issues/558 + Client *http.Client } func (g *GHCR) Name() string { @@ -308,6 +325,13 @@ func (g *GHCR) WithRegistryToken(registryToken string) { g.Token = registryToken } +func (g *GHCR) WithClient() *http.Client { + if g != nil && g.Client != nil { + return g.Client + } + return defaultHTTPClient +} + // parseNextLinkURL extracts the URL with rel="next" from a GitHub Link header, if present. func parseNextLinkURL(linkHeader string) string { if linkHeader == "" { @@ -375,7 +399,7 @@ func (g *GHCR) ListImages(ctx context.Context, namespace string) ([]string, erro headers.Set("Accept", "application/vnd.github+json") for nextURL != "" { - resp, err := requestWithRateLimit(ctx, http.MethodGet, nextURL, headers) + resp, err := requestWithRateLimit(ctx, g.WithClient(), http.MethodGet, nextURL, headers) if err != nil { return nil, err } diff --git a/pkg/sources/docker/registries_test.go b/pkg/sources/docker/registries_test.go index 657cf64a7b9c..6445fbaa3686 100644 --- a/pkg/sources/docker/registries_test.go +++ b/pkg/sources/docker/registries_test.go @@ -2,6 +2,7 @@ package docker import ( "fmt" + "net/http" "slices" "testing" @@ -60,3 +61,43 @@ func TestGHCRListImages(t *testing.T) { assert.Equal(t, ghcrImages, []string{"ghcr.io/mongodb/kingfisher"}) } + +func TestDockerHubListImages_RateLimitError(t *testing.T) { + t.Parallel() + + // Dockerhub registry + dockerhub := MakeRegistryFromNamespace("trufflesecurity") // no authentication + + // Cast dockerhub to *DockerHub registry to override the HTTP client + dockerhub.(*DockerHub).Client = common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}") + + dockerImages, err := dockerhub.ListImages(context.Background(), "trufflesecurity") // namespace without any prefix defaults to dockerhub registry + assert.Error(t, err) + assert.Nil(t, dockerImages) +} + +func TestQuayListImages_RateLimitError(t *testing.T) { + t.Parallel() + + // Quay.io registry + quay := MakeRegistryFromNamespace("quay.io/truffledockerman") // no authentication + // Cast quay to *Quay registry to override the HTTP client + quay.(*Quay).Client = common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}") + + quayImages, err := quay.ListImages(context.Background(), "quay.io/truffledockerman") + assert.Error(t, err) + assert.Nil(t, quayImages) +} + +func TestGHCRListImages_RateLimitError(t *testing.T) { + t.Parallel() + + // GHCR registry + ghcr := MakeRegistryFromNamespace("ghcr.io/mongodb") // no authentication + // Cast ghcr to *GHCR registry to override the HTTP client + ghcr.(*GHCR).Client = common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}") + + ghcrImages, err := ghcr.ListImages(context.Background(), "ghcr.io/mongodb") + assert.Error(t, err) + assert.Nil(t, ghcrImages) +} From 3599c7c1f05cb322dc3c1ca09a38c7204cf9a16c Mon Sep 17 00:00:00 2001 From: Nabeel Date: Wed, 19 Nov 2025 16:04:24 +0500 Subject: [PATCH 3/8] added exponential backoff --- pkg/sources/docker/registries.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index af24158f765d..66a6b7c8e968 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -47,8 +47,9 @@ func MakeRegistryFromNamespace(namespace string) Registry { } const ( - maxRetriesOnRateLimit = 5 - maxSleepTime = 30 * time.Second + maxRetriesOnRateLimit = 5 // maximum number of retries on HTTP 429 responses + maxSleepTime = 30 * time.Second // maximum allowed sleep time between retries + initialBackoff = 1 * time.Second // initial backoff time before retrying ) func requestWithRateLimit(ctx context.Context, client *http.Client, method, urlStr string, headers http.Header) (*http.Response, error) { @@ -80,7 +81,7 @@ func requestWithRateLimit(ctx context.Context, client *http.Client, method, urlS _, _ = io.Copy(io.Discard, resp.Body) _ = resp.Body.Close() - sleepFor := 5 * time.Second // 5 second default backoff + sleepFor := initialBackoff * (1 << attempts) // Exponentially increase backoff time if retryAfter != "" { // Retry-After can be either seconds or an HTTP date. From b97b6f0f3516e5ed573d61c1b56131e9154c2997 Mon Sep 17 00:00:00 2001 From: Nabeel Date: Thu, 20 Nov 2025 13:25:57 +0500 Subject: [PATCH 4/8] updated defered response body discards, added more comments, removed some unnecessary logic in Quay.ListImages() --- pkg/sources/docker/registries.go | 54 +++++++++++++------------------- 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index 66a6b7c8e968..96ac9ffdd5e4 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -161,12 +161,8 @@ func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, return nil, err } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() - body, err := io.ReadAll(resp.Body) + discardBody(resp) if err != nil { return nil, err } @@ -213,6 +209,7 @@ type Quay struct { type quayResp struct { Repositories []Image `json:"repositories"` HasAdditional bool `json:"has_additional"` + NextPage string `json:"next_page"` Page int `json:"page"` } @@ -248,6 +245,8 @@ func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, erro headers.Set("Authorization", "Bearer "+q.Token) } + // Loop through paginated results that is handled via the "next_page" query parameter + // Reference: https://docs.redhat.com/en/documentation/red_hat_quay/3.10/html/use_red_hat_quay/using_the_red_hat_quay_api#example_for_pagination for { u := *baseURL query := u.Query() @@ -263,13 +262,8 @@ func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, erro return nil, err } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() - body, err := io.ReadAll(resp.Body) - + discardBody(resp) if err != nil { return nil, err } @@ -288,23 +282,10 @@ func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, erro allImages = append(allImages, fmt.Sprintf("%s/%s", namespace, image.Name)) // quay.io// } - if !page.HasAdditional { - break - } - - // Newer Quay API versions may include "next_page" in the top-level JSON. - // To support that without introducing another struct, fall back to - // parsing it generically when HasAdditional is true. - var raw map[string]any - if err := json.Unmarshal(body, &raw); err != nil { - return nil, err - } - if np, ok := raw["next_page"].(string); ok && np != "" { - nextPageToken = np - } else { - // No token – stop to avoid an infinite loop. + if !page.HasAdditional || page.NextPage == "" { break } + nextPageToken = page.NextPage } return allImages, nil @@ -333,7 +314,11 @@ func (g *GHCR) WithClient() *http.Client { return defaultHTTPClient } -// parseNextLinkURL extracts the URL with rel="next" from a GitHub Link header, if present. +// GHCR paginates results and includes pagination links in the HTTP Link header. +// The Link header contains URLs for "next", "prev", "first", and "last" pages. +// Example Link header: +// ; rel="next", +// ; rel="last" func parseNextLinkURL(linkHeader string) string { if linkHeader == "" { return "" @@ -405,12 +390,8 @@ func (g *GHCR) ListImages(ctx context.Context, namespace string) ([]string, erro return nil, err } - defer func() { - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - }() - body, err := io.ReadAll(resp.Body) + discardBody(resp) if err != nil { return nil, err } @@ -430,9 +411,18 @@ func (g *GHCR) ListImages(ctx context.Context, namespace string) ([]string, erro allImages = append(allImages, fmt.Sprintf("%s/%s", namespace, image.Name)) // ghcr.io// } + // Check Link header for next page URL. link := resp.Header.Get("Link") nextURL = parseNextLinkURL(link) } return allImages, nil } + +// Function to discard response body +func discardBody(resp *http.Response) { + if resp != nil && resp.Body != nil { + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + } +} From 99de7275891a2039bd1ed9acfc2e194f495a3de6 Mon Sep 17 00:00:00 2001 From: Nabeel Date: Thu, 20 Nov 2025 13:36:21 +0500 Subject: [PATCH 5/8] readded quay list images query that the requests private images --- pkg/sources/docker/registries.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index 96ac9ffdd5e4..9dfdc3e82e04 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -252,6 +252,7 @@ func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, erro query := u.Query() query.Set("namespace", quayNamespace) query.Set("public", "true") + query.Set("private", "true") if nextPageToken != "" { query.Set("next_page", nextPageToken) } From 44e4d1e8d02c4223e9bb97763f65376d30e667d2 Mon Sep 17 00:00:00 2001 From: Nabeel Date: Wed, 26 Nov 2025 14:54:54 +0500 Subject: [PATCH 6/8] added golang.org/x/time/rate to limit registry API calling rate --- pkg/sources/docker/registries.go | 213 ++++++++++++-------------- pkg/sources/docker/registries_test.go | 7 +- 2 files changed, 101 insertions(+), 119 deletions(-) diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index 9dfdc3e82e04..f47045e2d354 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -8,9 +8,10 @@ import ( "net/http" "net/url" "path" - "strconv" "strings" "time" + + "golang.org/x/time/rate" ) // defaultHTTPClient defines a shared HTTP client with timeout for all registry requests. @@ -21,12 +22,17 @@ type Image struct { Name string `json:"name"` } +// registryRateLimiter limits how quickly we make registry API calls across all registries. +// We allow roughly 5 requests every ~7.5 seconds (one token every 1.5s) as a simple +// safeguard against overloading upstream APIs. +var registryRateLimiter = rate.NewLimiter(rate.Every(1500*time.Millisecond), 1) + // Registry is an interface for any Docker/OCI registry implementation that can list all images under a given namespace. type Registry interface { Name() string // return name of the registry WithRegistryToken(registryToken string) // set token for registry ListImages(ctx context.Context, namespace string) ([]string, error) // list all images - WithClient() *http.Client // return the HTTP client to use + WithClient(client *http.Client) // return the HTTP client to use } // MakeRegistryFromNamespace returns a Registry implementation @@ -46,70 +52,6 @@ func MakeRegistryFromNamespace(namespace string) Registry { return registry } -const ( - maxRetriesOnRateLimit = 5 // maximum number of retries on HTTP 429 responses - maxSleepTime = 30 * time.Second // maximum allowed sleep time between retries - initialBackoff = 1 * time.Second // initial backoff time before retrying -) - -func requestWithRateLimit(ctx context.Context, client *http.Client, method, urlStr string, headers http.Header) (*http.Response, error) { - for attempts := 0; attempts < maxRetriesOnRateLimit; attempts++ { - req, err := http.NewRequestWithContext(ctx, method, urlStr, http.NoBody) - if err != nil { - return nil, err - } - - // Copy headers each time to avoid mutating the caller's map. - for k, vs := range headers { - for _, v := range vs { - req.Header.Add(k, v) - } - } - - resp, err := client.Do(req) - if err != nil { - return nil, err - } - - if resp.StatusCode != http.StatusTooManyRequests { - return resp, nil - } - - // Honor Retry-After if it's present, otherwise back off a little. - retryAfter := resp.Header.Get("Retry-After") - - _, _ = io.Copy(io.Discard, resp.Body) - _ = resp.Body.Close() - - sleepFor := initialBackoff * (1 << attempts) // Exponentially increase backoff time - - if retryAfter != "" { - // Retry-After can be either seconds or an HTTP date. - if secs, err := strconv.Atoi(retryAfter); err == nil && secs >= 0 { - sleepFor = time.Duration(secs) * time.Second - } else if t, err := http.ParseTime(retryAfter); err == nil { - if duration := time.Until(t); duration > 0 { - sleepFor = duration - } - } - } - - // Rely on our own max sleep time instead of Retry-After, in case it's too long. - if sleepFor > maxSleepTime { - sleepFor = maxSleepTime - } - - // Respect context cancellation while sleeping. - select { - case <-ctx.Done(): - return nil, ctx.Err() - case <-time.After(sleepFor): - } - } - - return nil, fmt.Errorf("rate limited (HTTP status 429) after %d attempts", maxRetriesOnRateLimit) -} - // === Docker Hub Registry === // DockerHub implements the Registry interface for hub.docker.com. @@ -132,14 +74,15 @@ func (d *DockerHub) WithRegistryToken(registryToken string) { d.Token = registryToken } -func (d *DockerHub) WithClient() *http.Client { - if d != nil && d.Client != nil { - return d.Client - } - return defaultHTTPClient +func (d *DockerHub) WithClient(client *http.Client) { + d.Client = client } // ListImages lists all images under a Docker Hub namespace using Docker Hub's API. +// +// We fetch images in fixed-size pages and keep following the "next" link until there +// are no more pages. A shared rate limiter is used so we don't accidentally hammer +// the Docker Hub API. func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, error) { baseURL := &url.URL{ Scheme: "https", @@ -147,16 +90,32 @@ func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, Path: path.Join("v2", "namespaces", namespace, "repositories"), } + query := baseURL.Query() + query.Set("page_size", "100") // fetch images in batches of 100 per page + baseURL.RawQuery = query.Encode() + allImages := []string{} nextURL := baseURL.String() - headers := http.Header{} - if d.Token != "" { - headers.Set("Authorization", "Bearer "+d.Token) - } + for { + if err := registryRateLimiter.Wait(ctx); err != nil { + return nil, err + } - for nextURL != "" { - resp, err := requestWithRateLimit(ctx, d.WithClient(), http.MethodGet, nextURL, headers) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, nextURL, http.NoBody) + if err != nil { + return nil, err + } + + if d.Token != "" { + req.Header.Set("Authorization", "Bearer "+d.Token) + } + + client := d.Client + if client == nil { + client = defaultHTTPClient + } + resp, err := client.Do(req) if err != nil { return nil, err } @@ -184,13 +143,13 @@ func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, break } - // page.Next may be absolute or relative. + // Docker Hub sometimes returns an absolute "next" URL and sometimes a + // relative one. ResolveReference cleans that up for us and turns whatever + // they send into a proper URL we can call. next, err := url.Parse(page.Next) if err != nil { return nil, err } - - // ResolveReference handles both absolute and relative URLs. nextURL = baseURL.ResolveReference(next).String() } @@ -210,7 +169,6 @@ type quayResp struct { Repositories []Image `json:"repositories"` HasAdditional bool `json:"has_additional"` NextPage string `json:"next_page"` - Page int `json:"page"` } func (q *Quay) Name() string { @@ -221,14 +179,16 @@ func (q *Quay) WithRegistryToken(registryToken string) { q.Token = registryToken } -func (q *Quay) WithClient() *http.Client { - if q != nil && q.Client != nil { - return q.Client - } - return defaultHTTPClient +func (q *Quay) WithClient(client *http.Client) { + q.Client = client } // ListImages lists all images under a Quay namespace. +// API reference: +// +// GET https://quay.io/api/v1/repository?namespace=&public=true&private=true&next_page= +// +// We keep following next_page while has_additional is true. func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, error) { quayNamespace := path.Base(namespace) // quay.io/ -> namespace baseURL := &url.URL{ @@ -240,25 +200,37 @@ func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, erro allImages := []string{} nextPageToken := "" - headers := http.Header{} - if q.Token != "" { - headers.Set("Authorization", "Bearer "+q.Token) - } - - // Loop through paginated results that is handled via the "next_page" query parameter - // Reference: https://docs.redhat.com/en/documentation/red_hat_quay/3.10/html/use_red_hat_quay/using_the_red_hat_quay_api#example_for_pagination for { + if err := registryRateLimiter.Wait(ctx); err != nil { + return nil, err + } + u := *baseURL query := u.Query() query.Set("namespace", quayNamespace) query.Set("public", "true") query.Set("private", "true") + + // Quay's API controls page size internally; we just fetch page by page. if nextPageToken != "" { query.Set("next_page", nextPageToken) } u.RawQuery = query.Encode() - resp, err := requestWithRateLimit(ctx, q.WithClient(), http.MethodGet, u.String(), headers) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), http.NoBody) + if err != nil { + return nil, err + } + + if q.Token != "" { + req.Header.Set("Authorization", "Bearer "+q.Token) + } + + client := q.Client + if client == nil { + client = defaultHTTPClient + } + resp, err := client.Do(req) if err != nil { return nil, err } @@ -273,7 +245,6 @@ func (q *Quay) ListImages(ctx context.Context, namespace string) ([]string, erro return nil, fmt.Errorf("failed to list quay images: unexpected status code: %d", resp.StatusCode) } - // Quay includes repositories plus pagination hints in the body. var page quayResp if err := json.Unmarshal(body, &page); err != nil { return nil, err @@ -308,16 +279,14 @@ func (g *GHCR) WithRegistryToken(registryToken string) { g.Token = registryToken } -func (g *GHCR) WithClient() *http.Client { - if g != nil && g.Client != nil { - return g.Client - } - return defaultHTTPClient +func (g *GHCR) WithClient(client *http.Client) { + g.Client = client } // GHCR paginates results and includes pagination links in the HTTP Link header. // The Link header contains URLs for "next", "prev", "first", and "last" pages. // Example Link header: +// // ; rel="next", // ; rel="last" func parseNextLinkURL(linkHeader string) string { @@ -356,11 +325,14 @@ func parseNextLinkURL(linkHeader string) string { } // ListImages lists all images under a GHCR namespace. +// For GitHub Container Registry the package listing endpoint is: +// +// GET https://api.github.com/users/{namespace}/packages?package_type=container&per_page=100 +// +// The GitHub API is paginated via the Link response header. func (g *GHCR) ListImages(ctx context.Context, namespace string) ([]string, error) { ghcrNamespace := path.Base(namespace) // ghcr.io/ -> namespace - // Default to "users", which works for user namespaces. Organisation support - // can be added in the future if needed. baseURL := &url.URL{ Scheme: "https", Host: "api.github.com", @@ -368,25 +340,37 @@ func (g *GHCR) ListImages(ctx context.Context, namespace string) ([]string, erro } allImages := []string{} - nextURL := func() string { u := *baseURL q := u.Query() q.Set("package_type", "container") - q.Set("per_page", "100") + q.Set("per_page", "100") // fetch images in batches of 100 per page u.RawQuery = q.Encode() return u.String() }() - headers := http.Header{} - if g.Token != "" { - headers.Set("Authorization", "Bearer "+g.Token) - } - // GitHub recommends explicitly sending the v3 media type. - headers.Set("Accept", "application/vnd.github+json") - for nextURL != "" { - resp, err := requestWithRateLimit(ctx, g.WithClient(), http.MethodGet, nextURL, headers) + if err := registryRateLimiter.Wait(ctx); err != nil { + return nil, err + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, nextURL, http.NoBody) + if err != nil { + return nil, err + } + + // https://stackoverflow.com/questions/72732582/using-github-packages-without-personal-access-token + if g.Token != "" { + req.Header.Set("Authorization", "Bearer "+g.Token) + } + // GitHub recommends explicitly sending the v3 media type. + req.Header.Set("Accept", "application/vnd.github+json") + + client := g.Client + if client == nil { + client = defaultHTTPClient + } + resp, err := client.Do(req) if err != nil { return nil, err } @@ -412,9 +396,8 @@ func (g *GHCR) ListImages(ctx context.Context, namespace string) ([]string, erro allImages = append(allImages, fmt.Sprintf("%s/%s", namespace, image.Name)) // ghcr.io// } - // Check Link header for next page URL. - link := resp.Header.Get("Link") - nextURL = parseNextLinkURL(link) + // Determine if there's another page via the Link header. + nextURL = parseNextLinkURL(resp.Header.Get("Link")) } return allImages, nil diff --git a/pkg/sources/docker/registries_test.go b/pkg/sources/docker/registries_test.go index 6445fbaa3686..f6f3c04047dc 100644 --- a/pkg/sources/docker/registries_test.go +++ b/pkg/sources/docker/registries_test.go @@ -69,8 +69,7 @@ func TestDockerHubListImages_RateLimitError(t *testing.T) { dockerhub := MakeRegistryFromNamespace("trufflesecurity") // no authentication // Cast dockerhub to *DockerHub registry to override the HTTP client - dockerhub.(*DockerHub).Client = common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}") - + dockerhub.WithClient(common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}")) dockerImages, err := dockerhub.ListImages(context.Background(), "trufflesecurity") // namespace without any prefix defaults to dockerhub registry assert.Error(t, err) assert.Nil(t, dockerImages) @@ -82,7 +81,7 @@ func TestQuayListImages_RateLimitError(t *testing.T) { // Quay.io registry quay := MakeRegistryFromNamespace("quay.io/truffledockerman") // no authentication // Cast quay to *Quay registry to override the HTTP client - quay.(*Quay).Client = common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}") + quay.WithClient(common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}")) quayImages, err := quay.ListImages(context.Background(), "quay.io/truffledockerman") assert.Error(t, err) @@ -95,7 +94,7 @@ func TestGHCRListImages_RateLimitError(t *testing.T) { // GHCR registry ghcr := MakeRegistryFromNamespace("ghcr.io/mongodb") // no authentication // Cast ghcr to *GHCR registry to override the HTTP client - ghcr.(*GHCR).Client = common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}") + ghcr.WithClient(common.ConstantResponseHttpClient(http.StatusTooManyRequests, "{}")) ghcrImages, err := ghcr.ListImages(context.Background(), "ghcr.io/mongodb") assert.Error(t, err) From 167a1e95a431909f7f308cdb82a47b991d7fb8f2 Mon Sep 17 00:00:00 2001 From: Nabeel Date: Tue, 2 Dec 2025 10:55:32 +0500 Subject: [PATCH 7/8] switched to using retryable http client in docker registry calls --- pkg/sources/docker/registries.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index f47045e2d354..e4f19e9bc052 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -11,22 +11,24 @@ import ( "strings" "time" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "golang.org/x/time/rate" ) // defaultHTTPClient defines a shared HTTP client with timeout for all registry requests. -var defaultHTTPClient = &http.Client{Timeout: 10 * time.Second} - -// Image represents a container image or repository entry in a registry API response. -type Image struct { - Name string `json:"name"` -} +var defaultHTTPClient = common.RetryableHTTPClientTimeout(10) // registryRateLimiter limits how quickly we make registry API calls across all registries. // We allow roughly 5 requests every ~7.5 seconds (one token every 1.5s) as a simple // safeguard against overloading upstream APIs. var registryRateLimiter = rate.NewLimiter(rate.Every(1500*time.Millisecond), 1) +// Image represents a container image or repository entry in a registry API response. +type Image struct { + Name string `json:"name"` +} + // Registry is an interface for any Docker/OCI registry implementation that can list all images under a given namespace. type Registry interface { Name() string // return name of the registry From 28d6db56f3a12f91f63df742a51bbd40bfd3151e Mon Sep 17 00:00:00 2001 From: Nabeel Date: Fri, 5 Dec 2025 15:08:34 +0500 Subject: [PATCH 8/8] modified rate limiter. created constant for max page size in api request --- pkg/sources/docker/registries.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index e4f19e9bc052..b4402c50835b 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -9,7 +9,6 @@ import ( "net/url" "path" "strings" - "time" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -20,9 +19,11 @@ import ( var defaultHTTPClient = common.RetryableHTTPClientTimeout(10) // registryRateLimiter limits how quickly we make registry API calls across all registries. -// We allow roughly 5 requests every ~7.5 seconds (one token every 1.5s) as a simple -// safeguard against overloading upstream APIs. -var registryRateLimiter = rate.NewLimiter(rate.Every(1500*time.Millisecond), 1) +// We allow roughly 1 event every 1.5s, with a burst of 2 as a simple safeguard against overloading upstream APIs. +var registryRateLimiter = rate.NewLimiter(rate.Limit(2.0/3.0), 2) + +// maxRegistryPageSize defines the maximum number of images to request per page from a registry API. +const maxRegistryPageSize = 100 // Image represents a container image or repository entry in a registry API response. type Image struct { @@ -93,7 +94,7 @@ func (d *DockerHub) ListImages(ctx context.Context, namespace string) ([]string, } query := baseURL.Query() - query.Set("page_size", "100") // fetch images in batches of 100 per page + query.Set("page_size", fmt.Sprint(maxRegistryPageSize)) baseURL.RawQuery = query.Encode() allImages := []string{} @@ -346,7 +347,7 @@ func (g *GHCR) ListImages(ctx context.Context, namespace string) ([]string, erro u := *baseURL q := u.Query() q.Set("package_type", "container") - q.Set("per_page", "100") // fetch images in batches of 100 per page + q.Set("per_page", fmt.Sprint(maxRegistryPageSize)) // fetch images in batches of 100 per page u.RawQuery = q.Encode() return u.String() }()