Skip to content

Commit

Permalink
Merge pull request #57 from puerco/document-query
Browse files Browse the repository at this point in the history
Make the document queryable (and ensure unique names)
  • Loading branch information
k8s-ci-robot authored Feb 2, 2022
2 parents 58c15e1 + 91d8425 commit 1a1dee6
Show file tree
Hide file tree
Showing 8 changed files with 244 additions and 3 deletions.
10 changes: 9 additions & 1 deletion pkg/spdx/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ func (builder *defaultDocBuilderImpl) GenerateDoc(
if err != nil {
return nil, errors.Wrap(err, "generating package from directory")
}
doc.ensureUniqueElementID(pkg)

if err := doc.AddPackage(pkg); err != nil {
return nil, errors.Wrap(err, "adding directory package to document")
Expand All @@ -204,18 +205,22 @@ func (builder *defaultDocBuilderImpl) GenerateDoc(
if err != nil {
return nil, errors.Wrapf(err, "generating SPDX package from image ref %s", i)
}
doc.ensureUniqueElementID(p)
doc.ensureUniquePeerIDs(p.GetRelationships())
if err := doc.AddPackage(p); err != nil {
return nil, errors.Wrap(err, "adding package to document")
}
}

// Process OCI image archives
for _, tb := range genopts.Tarballs {
logrus.Infof("Processing tarball %s", tb)
logrus.Infof("Processing image archive %s", tb)
p, err := spdx.PackageFromImageTarball(tb)
if err != nil {
return nil, errors.Wrap(err, "generating tarball package")
}
doc.ensureUniqueElementID(p)
doc.ensureUniquePeerIDs(p.GetRelationships())
if err := doc.AddPackage(p); err != nil {
return nil, errors.Wrap(err, "adding package to document")
}
Expand All @@ -228,6 +233,8 @@ func (builder *defaultDocBuilderImpl) GenerateDoc(
if err != nil {
return nil, errors.Wrap(err, "creating spdx package from archive")
}
doc.ensureUniqueElementID(p)
doc.ensureUniquePeerIDs(p.GetRelationships())
if err := doc.AddPackage(p); err != nil {
return nil, errors.Wrap(err, "adding package to document")
}
Expand All @@ -240,6 +247,7 @@ func (builder *defaultDocBuilderImpl) GenerateDoc(
if err != nil {
return nil, errors.Wrap(err, "adding file")
}
doc.ensureUniqueElementID(f)
if err := doc.AddFile(f); err != nil {
return nil, errors.Wrap(err, "adding file to document")
}
Expand Down
78 changes: 76 additions & 2 deletions pkg/spdx/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,13 @@ func (d *Document) AddPackage(pkg *Package) error {

if pkg.SPDXID() == "" {
pkg.BuildID(pkg.Name)
d.ensureUniqueElementID(pkg)
}
if pkg.SPDXID() == "" {
return errors.New("package id is needed to add a new package")
return errors.New("package ID is needed to add a new package")
}
if _, ok := d.Packages[pkg.SPDXID()]; ok {
return errors.New("a package named " + pkg.SPDXID() + " already exists in the document")
return errors.Errorf("a package with ID %s already exists in the document", pkg.SPDXID())
}

d.Packages[pkg.SPDXID()] = pkg
Expand Down Expand Up @@ -278,6 +279,7 @@ func (d *Document) AddFile(file *File) error {
}
file.ID = "SPDXRef-File-" + fmt.Sprintf("%x", h.Sum(nil))
}
d.ensureUniqueElementID(file)
d.Files[file.ID] = file
return nil
}
Expand Down Expand Up @@ -398,3 +400,75 @@ func (d *Document) WriteProvenanceStatement(opts *ProvenanceOptions, path string
"writing sbom as provenance statement",
)
}

// ensureUniquePackageID takes a string and checks if
// there is another string with the same name in the document.
// If there is one, it will append a digit until a unique name
// is found.
func (d *Document) ensureUniqueElementID(o Object) {
newID := o.SPDXID()
i := 0
for {
// Check if there us already an element with the same ID
if el := d.GetElementByID(newID); el == nil {
if o.SPDXID() != newID {
logrus.Infof(
"Element name changed from %s to %s to ensure it is unique",
o.SPDXID(), newID,
)
}
o.SetSPDXID(newID)
break
}
i++
newID = fmt.Sprintf("%s-%04d", o.SPDXID(), i)
}
}

// ensureUniquePeerIDs gets a relationship collection and ensures all peers
// have unique IDs
func (d *Document) ensureUniquePeerIDs(rels *[]*Relationship) {
// First, ensure peer names are unique among themselves
seen := map[string]struct{}{}
for _, rel := range *rels {
if rel.Peer == nil || rel.Peer.SPDXID() == "" {
continue
}
testName := rel.Peer.SPDXID()
i := 0
for {
if _, ok := seen[testName]; !ok {
rel.Peer.SetSPDXID(testName)
seen[testName] = struct{}{}
break
}
i++
testName = fmt.Sprintf("%s-%04d", rel.Peer.SPDXID(), i)
}
}

// And then check against the document
for _, rel := range *rels {
if rel.Peer == nil {
continue
}
d.ensureUniqueElementID(rel.Peer)
}
}

// GetPackageByID queries the packages to search for a specific entity by name
// note that this method returns a copy of the entity if found.
func (d *Document) GetElementByID(id string) Object {
seen := map[string]struct{}{}
for _, p := range d.Packages {
if sub := recursiveSearch(id, p, &seen); sub != nil {
return sub
}
}
for _, f := range d.Files {
if sub := recursiveSearch(id, f, &seen); sub != nil {
return sub
}
}
return nil
}
51 changes: 51 additions & 0 deletions pkg/spdx/document_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,54 @@ func compareSubjects(t *testing.T, statement1, statement2 *provenance.Statement)
}
}
}

func TestEnsureUniqueElementID(t *testing.T) {
doc := NewDocument()
name := "same-name"
for i := 0; i < 3; i++ {
subp := NewPackage()
subp.SetSPDXID(name)

// Passing the subpackages through nsureUniquePackageID
// should rename the last two, but not the first one
doc.ensureUniqueElementID(subp)
require.NoError(t, doc.AddPackage(subp))

if i == 0 {
require.Equal(t, name, subp.SPDXID())
} else {
require.NotEqual(t, name, subp.SPDXID())
}
}
}

func TestEnsureUniquePeerIDs(t *testing.T) {
doc := NewDocument()
name := "same-name"

// Add one node with the name
namePkg := NewPackage()
namePkg.SetSPDXID(name)

// Build a package with 3 peers all with the same name
p := NewPackage()
p.SetSPDXID("parentNode")
for i := 0; i < 3; i++ {
subp := NewPackage()
subp.SetSPDXID(name)
require.NoError(t, p.AddPackage(subp))
}

// Pass the package with its peers through ensureUniquePeerIDs
doc.ensureUniquePeerIDs(p.GetRelationships())

// Now, they should all be different
seenNames := map[string]struct{}{}
rels := p.GetRelationships()
for _, rel := range *rels {
logrus.Info("Checking " + rel.Peer.SPDXID())
_, ok := seenNames[rel.Peer.SPDXID()]
require.False(t, ok, rel.Peer.SPDXID()+" should be unique")
seenNames[rel.Peer.SPDXID()] = struct{}{}
}
}
10 changes: 10 additions & 0 deletions pkg/spdx/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,13 @@ func getFileContentType(path string) (string, error) {
contentType := http.DetectContentType(buffer)
return contentType, nil
}

// GetElementByID search the file and its peers looking for the
// specified SPDX id. If found, the function returns a copy of
// the object identified by the SPDX-ID provided
func (f *File) GetElementByID(id string) Object {
if f.SPDXID() == id {
return f
}
return recursiveSearch(id, f, &map[string]struct{}{})
}
10 changes: 10 additions & 0 deletions pkg/spdx/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
// objects. Currently this includes files and packages.
type Object interface {
SPDXID() string
SetSPDXID(string)
ReadSourceFile(string) error
Render() (string, error)
BuildID(seeds ...string)
Expand All @@ -48,6 +49,7 @@ type Object interface {
GetRelationships() *[]*Relationship
ToProvenanceSubject() *intoto.Subject
getProvenanceSubjects(opts *ProvenanceOptions, seen *map[string]struct{}) []intoto.Subject
GetElementByID(string) Object
}

type Entity struct {
Expand Down Expand Up @@ -77,6 +79,11 @@ func (e *Entity) SPDXID() string {
return e.ID
}

// SPDXID returns the SPDX reference string for the object
func (e *Entity) SetSPDXID(id string) {
e.ID = id
}

// BuildID sets the file ID, optionally from a series of strings
func (e *Entity) BuildID(seeds ...string) {
if len(seeds) <= 1 {
Expand Down Expand Up @@ -269,3 +276,6 @@ mloop:
}
return ret
}

// GetElementByID nil function to be overridden by package and file
func (e *Entity) GetElementByID(string) Object { return nil }
11 changes: 11 additions & 0 deletions pkg/spdx/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,8 @@ func (p *Package) Draw(builder *strings.Builder, o *DrawingOptions, depth int, s
}
}

// ReadSourceFile reads a file from the filesystem and assigns its properties
// to the package metadata
func (p *Package) ReadSourceFile(path string) error {
if err := p.Entity.ReadSourceFile(path); err != nil {
return err
Expand All @@ -397,3 +399,12 @@ func (p *Package) ReadSourceFile(path string) error {
}
return nil
}

// GetElementByID search the package and its peers looking for the specified SPDX
// id. If found, the function returns a copy of the object
func (p *Package) GetElementByID(id string) Object {
if p.SPDXID() == id {
return p
}
return recursiveSearch(id, p, &map[string]struct{}{})
}
27 changes: 27 additions & 0 deletions pkg/spdx/spdx.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,30 @@ func Banner() string {
}
return string(d)
}

// recursiveSearch is a function that recursively searches an object's peers
// to find the specified SPDX ID. If found, returns a copy of the object.
// nolint:gocritic // seen is a pointer recursively populated
func recursiveSearch(id string, o Object, seen *map[string]struct{}) Object {
if o.SPDXID() == id {
return o
}
for _, rel := range *o.GetRelationships() {
if rel.Peer == nil {
continue
}

if _, ok := (*seen)[o.SPDXID()]; ok {
continue
}

if rel.Peer.SPDXID() == id {
return rel.Peer
}

if peerObject := recursiveSearch(id, rel.Peer, seen); peerObject != nil {
return peerObject
}
}
return nil
}
50 changes: 50 additions & 0 deletions pkg/spdx/spdx_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,3 +402,53 @@ func TestIgnorePatterns(t *testing.T) {
require.NoError(t, err)
require.Len(t, p, 4)
}

func TestRecursiveSearch(t *testing.T) {
p := NewPackage()
p.SetSPDXID("p-top")

// Lets nest 3 packages
packages := []*Package{}
for i := 0; i < 3; i++ {
subp := NewPackage()
subp.SetSPDXID(fmt.Sprintf("subpackage-%d", i))
packages = append(packages, subp)
}
for i, sp := range packages {
if i > 0 {
require.NoError(t, packages[i-1].AddPackage(sp))
}
}
require.NoError(t, p.AddPackage(packages[0]))

// This functions searches 3 packages with the same prefix:
checkSubPackages := func(p *Package, radix string) {
for i := 0; i < 3; i++ {
require.NotNil(t, p.GetElementByID(fmt.Sprintf("%s-%d", radix, i)),
fmt.Sprintf("searching for %s", radix),
)
}
}

checkSubPackages(p, "subpackage")
// Non existent packages should not return an element
require.Nil(t, p.GetElementByID("subpackage-10000000"))

// Now bifurcating the document structure adding dependencies should
// not alter those conditions.

// Lets add 3 dependencies to one of the nested packages
for i := 0; i < 3; i++ {
subp := NewPackage()
subp.SetSPDXID(fmt.Sprintf("dep-%d", i))
require.NoError(t, p.GetElementByID("subpackage-1").(*Package).AddPackage(subp))
}

// Same tests should still pass:
checkSubPackages(p, "subpackage")
// Non existent packages should not return an element
require.Nil(t, p.GetElementByID("subpackage-10000000"))

// But also dependencies should be found
checkSubPackages(p, "dep")
}

0 comments on commit 1a1dee6

Please sign in to comment.