diff --git a/cmd/ubuntu.go b/cmd/ubuntu.go index e01a52a..01f8c3d 100644 --- a/cmd/ubuntu.go +++ b/cmd/ubuntu.go @@ -1,6 +1,7 @@ package cmd import ( + "errors" "time" "github.com/inconshreveable/log15" @@ -31,7 +32,7 @@ func fetchUbuntu(_ *cobra.Command, _ []string) (err error) { return xerrors.Errorf("Failed to SetLogger. err: %w", err) } - cveJSONs, err := fetcher.FetchUbuntuVulnList() + cveJSONs, count, err := fetcher.FetchUbuntuVulnList() if err != nil { return xerrors.Errorf("Failed to initialize vulnerability DB. err: %w", err) } @@ -40,7 +41,7 @@ func fetchUbuntu(_ *cobra.Command, _ []string) (err error) { log15.Info("Initialize Database") driver, err := db.NewDB(viper.GetString("dbtype"), viper.GetString("dbpath"), viper.GetBool("debug-sql"), db.Option{}) if err != nil { - if xerrors.Is(err, db.ErrDBLocked) { + if errors.Is(err, db.ErrDBLocked) { return xerrors.Errorf("Failed to open DB. Close DB connection before fetching. err: %w", err) } return xerrors.Errorf("Failed to open DB. err: %w", err) @@ -58,9 +59,9 @@ func fetchUbuntu(_ *cobra.Command, _ []string) (err error) { return xerrors.Errorf("Failed to upsert FetchMeta to DB. dbpath: %s, err: %w", viper.GetString("dbpath"), err) } - log15.Info("Fetched", "CVEs", len(cves)) + log15.Info("Fetched", "CVEs", count) log15.Info("Insert Ubuntu into DB", "db", driver.Name()) - if err := driver.InsertUbuntu(cves); err != nil { + if err := driver.InsertUbuntu(cves, count); err != nil { return xerrors.Errorf("Failed to insert. dbpath: %s, err: %w", viper.GetString("dbpath"), err) } diff --git a/db/db.go b/db/db.go index 21a18ac..10dc1f3 100644 --- a/db/db.go +++ b/db/db.go @@ -2,6 +2,7 @@ package db import ( "fmt" + "iter" "time" "golang.org/x/xerrors" @@ -48,7 +49,7 @@ type DB interface { InsertRedhat([]models.RedhatCVE) error InsertDebian([]models.DebianCVE) error - InsertUbuntu([]models.UbuntuCVE) error + InsertUbuntu(iter.Seq2[models.UbuntuCVE, error], int) error InsertMicrosoft([]models.MicrosoftCVE, []models.MicrosoftKBRelation) error InsertArch([]models.ArchADV) error } diff --git a/db/redis.go b/db/redis.go index e688c97..939e412 100644 --- a/db/redis.go +++ b/db/redis.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "iter" "maps" "os" "slices" @@ -1096,25 +1097,13 @@ func (r *RedisDriver) InsertDebian(cves []models.DebianCVE) error { } // InsertUbuntu : -func (r *RedisDriver) InsertUbuntu(cves []models.UbuntuCVE) (err error) { +func (r *RedisDriver) InsertUbuntu(cves iter.Seq2[models.UbuntuCVE, error], count int) (err error) { ctx := context.Background() batchSize := viper.GetInt("batch-size") if batchSize < 1 { return xerrors.Errorf("Failed to set batch-size. err: batch-size option is not set properly") } - advs := map[string][]string{} - for _, c := range cves { - for _, r := range c.References { - if strings.HasPrefix(r.Reference, "https://ubuntu.com/security/notices/USN-") { - advs[strings.TrimPrefix(r.Reference, "https://ubuntu.com/security/notices/")] = append(advs[strings.TrimPrefix(r.Reference, "https://ubuntu.com/security/notices/")], c.Candidate) - } - } - } - for k := range advs { - advs[k] = util.Unique(advs[k]) - } - // newDeps, oldDeps: {"CVEID": {"PKGNAME": {}}, "advisories": {"ADVISORYID": {}}} newDeps := map[string]map[string]struct{}{"advisories": {}} oldDepsStr, err := r.conn.HGet(ctx, depKey, ubuntuName).Result() @@ -1129,14 +1118,27 @@ func (r *RedisDriver) InsertUbuntu(cves []models.UbuntuCVE) (err error) { return xerrors.Errorf("Failed to unmarshal JSON. err: %w", err) } - log15.Info("Insert CVEs", "cves", len(cves)) - bar := pb.StartNew(len(cves)).SetWriter(func() io.Writer { + log15.Info("Insert CVEs", "cves", count) + bar := pb.StartNew(count).SetWriter(func() io.Writer { if viper.GetBool("log-json") { return io.Discard } return os.Stderr }()) - for chunk := range slices.Chunk(cves, batchSize) { + advs := map[string][]string{} + + for chunk, err := range util.Chunk(cves, batchSize) { + if err != nil { + return xerrors.Errorf("Failed to chunk cves. err: %w", err) + } + for _, c := range chunk { + for _, r := range c.References { + if strings.HasPrefix(r.Reference, "https://ubuntu.com/security/notices/USN-") { + advs[strings.TrimPrefix(r.Reference, "https://ubuntu.com/security/notices/")] = slices.Compact(append(advs[strings.TrimPrefix(r.Reference, "https://ubuntu.com/security/notices/")], c.Candidate)) + } + } + } + pipe := r.conn.Pipeline() cvekey := fmt.Sprintf(cveKeyFormat, ubuntuName) for _, cve := range chunk { @@ -1168,6 +1170,7 @@ func (r *RedisDriver) InsertUbuntu(cves []models.UbuntuCVE) (err error) { } bar.Add(len(chunk)) } + bar.SetCurrent(int64(count)) // The file number doesn't always match the number of CVEs that are actually inserted. bar.Finish() log15.Info("Insert Advisories", "advisories", len(advs)) diff --git a/db/ubuntu.go b/db/ubuntu.go index 4a51a0b..eeab927 100644 --- a/db/ubuntu.go +++ b/db/ubuntu.go @@ -4,8 +4,8 @@ import ( "errors" "fmt" "io" + "iter" "os" - "slices" "strings" "github.com/cheggaaa/pb/v3" @@ -78,16 +78,16 @@ func (r *RDBDriver) GetUbuntuMulti(cveIDs []string) (map[string]models.UbuntuCVE } // InsertUbuntu : -func (r *RDBDriver) InsertUbuntu(cves []models.UbuntuCVE) (err error) { - if err = r.deleteAndInsertUbuntu(cves); err != nil { +func (r *RDBDriver) InsertUbuntu(cves iter.Seq2[models.UbuntuCVE, error], count int) (err error) { + if err = r.deleteAndInsertUbuntu(cves, count); err != nil { return xerrors.Errorf("Failed to insert Ubuntu CVE data. err: %s", err) } return nil } -func (r *RDBDriver) deleteAndInsertUbuntu(cves []models.UbuntuCVE) (err error) { - bar := pb.StartNew(len(cves)).SetWriter(func() io.Writer { +func (r *RDBDriver) deleteAndInsertUbuntu(cves iter.Seq2[models.UbuntuCVE, error], count int) (err error) { + bar := pb.StartNew(count).SetWriter(func() io.Writer { if viper.GetBool("log-json") { return io.Discard } @@ -115,7 +115,11 @@ func (r *RDBDriver) deleteAndInsertUbuntu(cves []models.UbuntuCVE) (err error) { return xerrors.New("Failed to set batch-size. err: batch-size option is not set properly") } - for chunk := range slices.Chunk(cves, batchSize) { + for chunk, err := range util.Chunk(cves, batchSize) { + if err != nil { + return xerrors.Errorf("failed to insert Ubuntu CVE data. err: %w", err) + } + if err = tx.Create(chunk).Error; err != nil { return xerrors.Errorf("Failed to insert. err: %w", err) } diff --git a/fetcher/ubuntu.go b/fetcher/ubuntu.go index c7a8450..b874eac 100644 --- a/fetcher/ubuntu.go +++ b/fetcher/ubuntu.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "io" + "iter" "path/filepath" "github.com/inconshreveable/log15" @@ -20,46 +21,63 @@ const ( ) // FetchUbuntuVulnList clones vuln-list and returns CVE JSONs -func FetchUbuntuVulnList() (entries []models.UbuntuCVEJSON, err error) { +func FetchUbuntuVulnList() (iter.Seq2[models.UbuntuCVEJSON, error], int, error) { // Clone vuln-list repository dir := filepath.Join(util.CacheDir(), "vuln-list") updatedFiles, err := git.CloneOrPull(ubuntuRepoURL, dir, ubuntuDir) if err != nil { - return nil, xerrors.Errorf("error in vulnsrc clone or pull: %w", err) + return nil, 0, xerrors.Errorf("error in vulnsrc clone or pull: %w", err) } // Only last_updated.json if len(updatedFiles) <= 1 { - return nil, nil + return nil, 0, nil } rootDir := filepath.Join(dir, ubuntuDir) targets, err := util.FilterTargets(ubuntuDir, updatedFiles) if err != nil { - return nil, xerrors.Errorf("failed to filter target files: %w", err) + return nil, 0, xerrors.Errorf("failed to filter target files: %w", err) } else if len(targets) == 0 { log15.Debug("Ubuntu: no update file") - return nil, nil + return nil, 0, nil } log15.Debug(fmt.Sprintf("Ubuntu updated files: %d", len(targets))) - err = util.FileWalk(rootDir, targets, func(r io.Reader, _ string) error { - content, err := io.ReadAll(r) - if err != nil { - return err - } + count, err := countUbuntuCVEs(rootDir, targets) + if err != nil { + return nil, 0, xerrors.Errorf("failed to count Ubuntu CVEs: %w", err) + } + + return func(yield func(models.UbuntuCVEJSON, error) bool) { + + err = util.FileWalk(rootDir, targets, func(r io.Reader, _ string) error { + content, err := io.ReadAll(r) + if err != nil { + return err + } - cve := models.UbuntuCVEJSON{} - if err = json.Unmarshal(content, &cve); err != nil { - return xerrors.Errorf("failed to decode Ubuntu JSON: %w", err) + cve := models.UbuntuCVEJSON{} + if err = json.Unmarshal(content, &cve); err != nil { + return xerrors.Errorf("failed to decode Ubuntu JSON: %w", err) + } + + if !yield(cve, nil) { + return err + } + return nil + }) + if err != nil && !yield(models.UbuntuCVEJSON{}, xerrors.Errorf("error in Ubuntu walk: %w", err)) { + return } + }, count, nil +} - entries = append(entries, cve) +func countUbuntuCVEs(rootDir string, targets map[string]struct{}) (int, error) { + count := 0 + err := util.FileWalk(rootDir, targets, func(r io.Reader, _ string) error { + count++ return nil }) - if err != nil { - return nil, xerrors.Errorf("error in Ubuntu walk: %w", err) - } - - return entries, nil + return count, err } diff --git a/models/ubuntu.go b/models/ubuntu.go index c158fd3..1951755 100644 --- a/models/ubuntu.go +++ b/models/ubuntu.go @@ -1,6 +1,7 @@ package models import ( + "iter" "strings" "time" ) @@ -103,75 +104,80 @@ type UbuntuUpstreamLink struct { } // ConvertUbuntu : -func ConvertUbuntu(cveJSONs []UbuntuCVEJSON) (cves []UbuntuCVE) { - for _, cve := range cveJSONs { - if strings.Contains(cve.Description, "** REJECT **") { - continue - } +func ConvertUbuntu(cveJSONs iter.Seq2[UbuntuCVEJSON, error]) iter.Seq2[UbuntuCVE, error] { + return func(yield func(UbuntuCVE, error) bool) { + for cve, err := range cveJSONs { + if err != nil && !yield(UbuntuCVE{}, err) { + return + } + if strings.Contains(cve.Description, "** REJECT **") { + continue + } - if cve.PublicDateAtUSN == time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC) { - cve.PublicDateAtUSN = time.Date(1000, time.January, 1, 0, 0, 0, 0, time.UTC) - } + if cve.PublicDateAtUSN == time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC) { + cve.PublicDateAtUSN = time.Date(1000, time.January, 1, 0, 0, 0, 0, time.UTC) + } - if cve.CRD == time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC) { - cve.CRD = time.Date(1000, time.January, 1, 0, 0, 0, 0, time.UTC) - } + if cve.CRD == time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC) { + cve.CRD = time.Date(1000, time.January, 1, 0, 0, 0, 0, time.UTC) + } - if cve.PublicDate == time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC) { - cve.PublicDate = time.Date(1000, time.January, 1, 0, 0, 0, 0, time.UTC) - } + if cve.PublicDate == time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC) { + cve.PublicDate = time.Date(1000, time.January, 1, 0, 0, 0, 0, time.UTC) + } - references := []UbuntuReference{} - for _, r := range cve.References { - references = append(references, UbuntuReference{Reference: r}) - } + references := []UbuntuReference{} + for _, r := range cve.References { + references = append(references, UbuntuReference{Reference: r}) + } - notes := []UbuntuNote{} - for _, n := range cve.Notes { - notes = append(notes, UbuntuNote{Note: n}) - } + notes := []UbuntuNote{} + for _, n := range cve.Notes { + notes = append(notes, UbuntuNote{Note: n}) + } - bugs := []UbuntuBug{} - for _, b := range cve.Bugs { - bugs = append(bugs, UbuntuBug{Bug: b}) - } + bugs := []UbuntuBug{} + for _, b := range cve.Bugs { + bugs = append(bugs, UbuntuBug{Bug: b}) + } - patches := []UbuntuPatch{} - for pkgName, p := range cve.Patches { - var releasePatch []UbuntuReleasePatch - for release, patch := range p { - releasePatch = append(releasePatch, UbuntuReleasePatch{ReleaseName: release, Status: patch.Status, Note: patch.Note}) + patches := []UbuntuPatch{} + for pkgName, p := range cve.Patches { + var releasePatch []UbuntuReleasePatch + for release, patch := range p { + releasePatch = append(releasePatch, UbuntuReleasePatch{ReleaseName: release, Status: patch.Status, Note: patch.Note}) + } + patches = append(patches, UbuntuPatch{PackageName: pkgName, ReleasePatches: releasePatch}) } - patches = append(patches, UbuntuPatch{PackageName: pkgName, ReleasePatches: releasePatch}) - } - upstreams := []UbuntuUpstream{} - for pkgName, u := range cve.UpstreamLinks { - links := []UbuntuUpstreamLink{} - for _, link := range u { - links = append(links, UbuntuUpstreamLink{Link: link}) + upstreams := []UbuntuUpstream{} + for pkgName, u := range cve.UpstreamLinks { + links := []UbuntuUpstreamLink{} + for _, link := range u { + links = append(links, UbuntuUpstreamLink{Link: link}) + } + upstreams = append(upstreams, UbuntuUpstream{PackageName: pkgName, UpstreamLinks: links}) } - upstreams = append(upstreams, UbuntuUpstream{PackageName: pkgName, UpstreamLinks: links}) - } - c := UbuntuCVE{ - PublicDateAtUSN: cve.PublicDateAtUSN, - CRD: cve.CRD, - Candidate: cve.Candidate, - PublicDate: cve.PublicDate, - References: references, - Description: cve.Description, - UbuntuDescription: cve.UbuntuDescription, - Notes: notes, - Bugs: bugs, - Priority: cve.Priority, - DiscoveredBy: cve.DiscoveredBy, - AssignedTo: cve.AssignedTo, - Patches: patches, - Upstreams: upstreams, + c := UbuntuCVE{ + PublicDateAtUSN: cve.PublicDateAtUSN, + CRD: cve.CRD, + Candidate: cve.Candidate, + PublicDate: cve.PublicDate, + References: references, + Description: cve.Description, + UbuntuDescription: cve.UbuntuDescription, + Notes: notes, + Bugs: bugs, + Priority: cve.Priority, + DiscoveredBy: cve.DiscoveredBy, + AssignedTo: cve.AssignedTo, + Patches: patches, + Upstreams: upstreams, + } + if !yield(c, nil) { + return + } } - cves = append(cves, c) } - - return cves } diff --git a/util/util.go b/util/util.go index 68936fa..3ed08ae 100644 --- a/util/util.go +++ b/util/util.go @@ -4,6 +4,7 @@ import ( "bytes" "fmt" "io" + "iter" "maps" "os" "os/exec" @@ -344,3 +345,36 @@ func (p *ProgressBar) Finish() { } p.client.Finish() } + +// Chunk chunks the sequence into n-sized chunks +// Note: slices.Chunk doesn't support iterators as of Go 1.23. +// https://pkg.go.dev/slices#Chunk +func Chunk[T any](s iter.Seq2[T, error], n int) iter.Seq2[[]T, error] { + return func(yield func([]T, error) bool) { + if n < 1 { + panic("cannot be less than 1") + } + + chunk := make([]T, 0, n) + for t, err := range s { + if err != nil && !yield(nil, err) { + return + } + chunk = append(chunk, t) + if len(chunk) != n { + continue + } + + if !yield(chunk, nil) { + return + } + chunk = chunk[:0] + } + + if len(chunk) > 0 { + if !yield(chunk, nil) { + return + } + } + } +}