Skip to content

Commit 9f8af05

Browse files
authored
feat: add local cache to sbom enrichment (#119)
1 parent 2627324 commit 9f8af05

File tree

4 files changed

+328
-7
lines changed

4 files changed

+328
-7
lines changed

lib/ecosystems/cache.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* © 2023 Snyk Limited All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package ecosystems
18+
19+
import (
20+
"sync"
21+
22+
"github.com/package-url/packageurl-go"
23+
24+
"github.com/snyk/parlay/ecosystems/packages"
25+
)
26+
27+
type Cache interface {
28+
GetPackageData(purl packageurl.PackageURL) (*packages.GetRegistryPackageResponse, error)
29+
GetPackageVersionData(purl packageurl.PackageURL) (*packages.GetRegistryPackageVersionResponse, error)
30+
}
31+
32+
type InMemoryCache struct {
33+
packageCache map[string]*packages.GetRegistryPackageResponse
34+
packageVersionCache map[string]*packages.GetRegistryPackageVersionResponse
35+
mu sync.RWMutex
36+
}
37+
38+
func NewInMemoryCache() *InMemoryCache {
39+
return &InMemoryCache{
40+
packageCache: make(map[string]*packages.GetRegistryPackageResponse),
41+
packageVersionCache: make(map[string]*packages.GetRegistryPackageVersionResponse),
42+
}
43+
}
44+
45+
func (c *InMemoryCache) GetPackageData(purl packageurl.PackageURL) (*packages.GetRegistryPackageResponse, error) {
46+
key := purl.ToString()
47+
48+
c.mu.RLock()
49+
if cached, exists := c.packageCache[key]; exists {
50+
c.mu.RUnlock()
51+
return cached, nil
52+
}
53+
c.mu.RUnlock()
54+
55+
response, err := GetPackageData(purl)
56+
if err != nil {
57+
return nil, err
58+
}
59+
60+
c.mu.Lock()
61+
c.packageCache[key] = response
62+
c.mu.Unlock()
63+
64+
return response, nil
65+
}
66+
67+
func (c *InMemoryCache) GetPackageVersionData(purl packageurl.PackageURL) (*packages.GetRegistryPackageVersionResponse, error) {
68+
key := purl.ToString()
69+
70+
c.mu.RLock()
71+
if cached, exists := c.packageVersionCache[key]; exists {
72+
c.mu.RUnlock()
73+
return cached, nil
74+
}
75+
c.mu.RUnlock()
76+
77+
response, err := GetPackageVersionData(purl)
78+
if err != nil {
79+
return nil, err
80+
}
81+
82+
c.mu.Lock()
83+
c.packageVersionCache[key] = response
84+
c.mu.Unlock()
85+
86+
return response, nil
87+
}
88+
89+
func (c *InMemoryCache) GetCacheStats() (int, int) {
90+
c.mu.RLock()
91+
defer c.mu.RUnlock()
92+
return len(c.packageCache), len(c.packageVersionCache)
93+
}

lib/ecosystems/cache_test.go

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
/*
2+
* © 2023 Snyk Limited All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package ecosystems
18+
19+
import (
20+
"sync"
21+
"testing"
22+
23+
"github.com/jarcoal/httpmock"
24+
"github.com/package-url/packageurl-go"
25+
"github.com/stretchr/testify/assert"
26+
"github.com/stretchr/testify/require"
27+
)
28+
29+
func TestInMemoryCache_GetPackageData(t *testing.T) {
30+
httpmock.Activate()
31+
defer httpmock.DeactivateAndReset()
32+
33+
mockResponse := `{"name": "test-package", "description": "Test package"}`
34+
httpmock.RegisterResponder(
35+
"GET",
36+
`=~^https://packages.ecosyste.ms/api/v1/registries`,
37+
httpmock.NewStringResponder(200, mockResponse),
38+
)
39+
40+
cache := NewInMemoryCache()
41+
purl, err := packageurl.FromString("pkg:npm/[email protected]")
42+
require.NoError(t, err)
43+
44+
resp1, err := cache.GetPackageData(purl)
45+
assert.NoError(t, err)
46+
assert.NotNil(t, resp1)
47+
48+
resp2, err := cache.GetPackageData(purl)
49+
assert.NoError(t, err)
50+
assert.NotNil(t, resp2)
51+
assert.Equal(t, resp1, resp2)
52+
53+
callCount := httpmock.GetTotalCallCount()
54+
assert.Equal(t, 1, callCount)
55+
56+
pkgCacheSize, versionCacheSize := cache.GetCacheStats()
57+
assert.Equal(t, 1, pkgCacheSize)
58+
assert.Equal(t, 0, versionCacheSize)
59+
}
60+
61+
func TestInMemoryCache_GetPackageVersionData(t *testing.T) {
62+
httpmock.Activate()
63+
defer httpmock.DeactivateAndReset()
64+
65+
mockResponse := `{"number": "1.0.0", "licenses": "MIT"}`
66+
httpmock.RegisterResponder(
67+
"GET",
68+
`=~^https://packages.ecosyste.ms/api/v1/registries`,
69+
httpmock.NewStringResponder(200, mockResponse),
70+
)
71+
72+
cache := NewInMemoryCache()
73+
purl, err := packageurl.FromString("pkg:npm/[email protected]")
74+
require.NoError(t, err)
75+
76+
resp1, err := cache.GetPackageVersionData(purl)
77+
assert.NoError(t, err)
78+
assert.NotNil(t, resp1)
79+
80+
resp2, err := cache.GetPackageVersionData(purl)
81+
assert.NoError(t, err)
82+
assert.NotNil(t, resp2)
83+
assert.Equal(t, resp1, resp2)
84+
85+
callCount := httpmock.GetTotalCallCount()
86+
assert.Equal(t, 1, callCount)
87+
88+
pkgCacheSize, versionCacheSize := cache.GetCacheStats()
89+
assert.Equal(t, 0, pkgCacheSize)
90+
assert.Equal(t, 1, versionCacheSize)
91+
}
92+
93+
func TestInMemoryCache_DifferentPackages(t *testing.T) {
94+
httpmock.Activate()
95+
defer httpmock.DeactivateAndReset()
96+
97+
httpmock.RegisterResponder(
98+
"GET",
99+
`=~^https://packages.ecosyste.ms/api/v1/registries`,
100+
httpmock.NewStringResponder(200, `{}`),
101+
)
102+
103+
cache := NewInMemoryCache()
104+
purl1, err := packageurl.FromString("pkg:npm/[email protected]")
105+
require.NoError(t, err)
106+
purl2, err := packageurl.FromString("pkg:npm/[email protected]")
107+
require.NoError(t, err)
108+
109+
_, err = cache.GetPackageData(purl1)
110+
assert.NoError(t, err)
111+
_, err = cache.GetPackageData(purl2)
112+
assert.NoError(t, err)
113+
114+
callCount := httpmock.GetTotalCallCount()
115+
assert.Equal(t, 2, callCount)
116+
117+
pkgCacheSize, versionCacheSize := cache.GetCacheStats()
118+
assert.Equal(t, 2, pkgCacheSize)
119+
assert.Equal(t, 0, versionCacheSize)
120+
}
121+
122+
func TestInMemoryCache_SamePackageDifferentVersions(t *testing.T) {
123+
httpmock.Activate()
124+
defer httpmock.DeactivateAndReset()
125+
126+
httpmock.RegisterResponder(
127+
"GET",
128+
`=~^https://packages.ecosyste.ms/api/v1/registries`,
129+
httpmock.NewStringResponder(200, `{}`),
130+
)
131+
132+
cache := NewInMemoryCache()
133+
purl1, err := packageurl.FromString("pkg:npm/[email protected]")
134+
require.NoError(t, err)
135+
purl2, err := packageurl.FromString("pkg:npm/[email protected]")
136+
require.NoError(t, err)
137+
138+
_, err = cache.GetPackageData(purl1)
139+
assert.NoError(t, err)
140+
_, err = cache.GetPackageData(purl2)
141+
assert.NoError(t, err)
142+
143+
// Different versions = different cache entries
144+
callCount := httpmock.GetTotalCallCount()
145+
assert.Equal(t, 2, callCount)
146+
147+
pkgCacheSize, versionCacheSize := cache.GetCacheStats()
148+
assert.Equal(t, 2, pkgCacheSize)
149+
assert.Equal(t, 0, versionCacheSize)
150+
}
151+
152+
func TestInMemoryCache_APIError(t *testing.T) {
153+
httpmock.Activate()
154+
defer httpmock.DeactivateAndReset()
155+
156+
// HTTP client returns a successful response even with 500 status
157+
// So we need to test that the client properly handles this case
158+
httpmock.RegisterResponder(
159+
"GET",
160+
`=~^https://packages.ecosyste.ms/api/v1/registries`,
161+
httpmock.NewStringResponder(500, `{"error": "internal server error"}`),
162+
)
163+
164+
cache := NewInMemoryCache()
165+
purl, err := packageurl.FromString("pkg:npm/[email protected]")
166+
require.NoError(t, err)
167+
168+
// HTTP client doesn't treat 500 as error
169+
resp1, err := cache.GetPackageData(purl)
170+
assert.NoError(t, err)
171+
assert.Equal(t, 500, resp1.StatusCode())
172+
173+
resp2, err := cache.GetPackageData(purl)
174+
assert.NoError(t, err)
175+
assert.Equal(t, 500, resp2.StatusCode())
176+
assert.Equal(t, resp1, resp2)
177+
178+
// Second call used cache
179+
callCount := httpmock.GetTotalCallCount()
180+
assert.Equal(t, 1, callCount)
181+
182+
pkgCacheSize, versionCacheSize := cache.GetCacheStats()
183+
assert.Equal(t, 1, pkgCacheSize)
184+
assert.Equal(t, 0, versionCacheSize)
185+
}
186+
187+
func TestInMemoryCache_ConcurrentAccess(t *testing.T) {
188+
httpmock.Activate()
189+
defer httpmock.DeactivateAndReset()
190+
191+
httpmock.RegisterResponder(
192+
"GET",
193+
`=~^https://packages.ecosyste.ms/api/v1/registries`,
194+
httpmock.NewStringResponder(200, `{}`),
195+
)
196+
197+
cache := NewInMemoryCache()
198+
purl, err := packageurl.FromString("pkg:npm/[email protected]")
199+
require.NoError(t, err)
200+
201+
var wg sync.WaitGroup
202+
numGoroutines := 100
203+
204+
for i := 0; i < numGoroutines; i++ {
205+
wg.Add(1)
206+
go func() {
207+
defer wg.Done()
208+
_, err := cache.GetPackageData(purl)
209+
assert.NoError(t, err)
210+
}()
211+
}
212+
213+
wg.Wait()
214+
215+
// Should only have one entry despite concurrent access
216+
pkgCacheSize, versionCacheSize := cache.GetCacheStats()
217+
assert.Equal(t, 1, pkgCacheSize)
218+
assert.Equal(t, 0, versionCacheSize)
219+
220+
// API should have been called at least once, but should be much less than 100 due to caching
221+
// Can't guarantee exactly 1 call due to race conditions, but should be significantly less than numGoroutines
222+
callCount := httpmock.GetTotalCallCount()
223+
assert.True(t, callCount >= 1 && callCount < numGoroutines)
224+
}

lib/ecosystems/enrich_cyclonedx.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@ import (
3030
"github.com/snyk/parlay/internal/utils"
3131
)
3232

33-
type cdxPackageEnricher = func(*cdx.Component, *packages.Package)
34-
type cdxPackageVersionEnricher = func(*cdx.Component, *packages.VersionWithDependencies, *packages.Package)
33+
type (
34+
cdxPackageEnricher = func(*cdx.Component, *packages.Package)
35+
cdxPackageVersionEnricher = func(*cdx.Component, *packages.VersionWithDependencies, *packages.Package)
36+
)
3537

3638
var cdxPackageEnrichers = []cdxPackageEnricher{
3739
enrichCDXDescription,
@@ -195,6 +197,7 @@ func enrichCDXTopics(comp *cdx.Component, data *packages.Package) {
195197

196198
func enrichCDX(bom *cdx.BOM, logger *zerolog.Logger) {
197199
wg := sizedwaitgroup.New(20)
200+
cache := NewInMemoryCache()
198201

199202
comps := utils.DiscoverCDXComponents(bom)
200203
logger.Debug().Msgf("Detected %d packages", len(comps))
@@ -213,7 +216,7 @@ func enrichCDX(bom *cdx.BOM, logger *zerolog.Logger) {
213216
return
214217
}
215218

216-
packageResp, err := GetPackageData(purl)
219+
packageResp, err := cache.GetPackageData(purl)
217220
if err != nil {
218221
l.Debug().
219222
Err(err).
@@ -232,7 +235,7 @@ func enrichCDX(bom *cdx.BOM, logger *zerolog.Logger) {
232235
enrichFunc(comp, packageResp.JSON200)
233236
}
234237

235-
packageVersionResp, err := GetPackageVersionData(purl)
238+
packageVersionResp, err := cache.GetPackageVersionData(purl)
236239
if err != nil {
237240
l.Debug().
238241
Err(err).
@@ -250,7 +253,6 @@ func enrichCDX(bom *cdx.BOM, logger *zerolog.Logger) {
250253
for _, enrichFunc := range cdxPackageVersionEnrichers {
251254
enrichFunc(comp, packageVersionResp.JSON200, packageResp.JSON200)
252255
}
253-
254256
}(comps[i])
255257
}
256258

lib/ecosystems/enrich_spdx.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,15 @@ func enrichSPDX(bom *spdx.Document, logger *zerolog.Logger) {
3535

3636
logger.Debug().Msgf("Detected %d packages", len(packages))
3737

38+
cache := NewInMemoryCache()
39+
3840
for _, pkg := range packages {
3941
purl, err := extractPurl(pkg)
4042
if err != nil {
4143
continue
4244
}
4345

44-
packageResp, err := GetPackageData(*purl)
46+
packageResp, err := cache.GetPackageData(*purl)
4547
if err != nil {
4648
continue
4749
}
@@ -55,7 +57,7 @@ func enrichSPDX(bom *spdx.Document, logger *zerolog.Logger) {
5557
enrichSPDXHomepage(pkg, pkgData)
5658
enrichSPDXSupplier(pkg, pkgData)
5759

58-
packageVersionResp, err := GetPackageVersionData(*purl)
60+
packageVersionResp, err := cache.GetPackageVersionData(*purl)
5961
if err != nil {
6062
continue
6163
}

0 commit comments

Comments
 (0)