Skip to content

Commit

Permalink
gazctl shards prune: continue pruning after failure to delete fragments
Browse files Browse the repository at this point in the history
This allows recovery log pruning to continue after encountering an error
removing a fragment.  We now operate Gazette clusters that use a variety of
different storage buckets, and it seems unavoidable that some of them might
have permissions misconfigured or return an error for some other reason.  In
that case, we'll now log a warning and continue the prune operation. Gazctl
will still exit non-zero if it has encountered any errors removing fragments,
to ensure it never fails silently.
  • Loading branch information
psFried committed Feb 6, 2024
1 parent 6a67440 commit 338b339
Showing 1 changed file with 19 additions and 5 deletions.
24 changes: 19 additions & 5 deletions cmd/gazctl/gazctlcmd/shards_prune.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,21 @@ func (cmd *cmdShardsPrune) Execute([]string) error {
"end": spec.End,
}).Debug("pruning fragment")

metrics.fragmentsPruned++
metrics.bytesPruned += spec.ContentLength()
prunedFragments = append(prunedFragments, spec)

var removed = true
if !cmd.DryRun {
mbp.Must(fragment.Remove(ctx, spec), "error removing fragment", "path", spec.ContentPath())
if err := fragment.Remove(ctx, spec); err != nil {
removed = false
metrics.failedToRemove++
log.WithFields(log.Fields{
"fragment": spec,
"error": err,
}).Warn("failed to remove fragment (skipping)")
}
}
if removed {
metrics.fragmentsPruned++
metrics.bytesPruned += spec.ContentLength()
prunedFragments = append(prunedFragments, spec)
}
}
}
Expand All @@ -147,6 +156,9 @@ func (cmd *cmdShardsPrune) Execute([]string) error {
}
logShardsPruneMetrics(metrics, "", "finished pruning logs for all shards")

if metrics.failedToRemove > 0 {
log.WithField("failures", metrics.failedToRemove).Fatal("failed to remove fragments")
}
return nil
}

Expand Down Expand Up @@ -203,6 +215,7 @@ type shardsPruneMetrics struct {
bytesTotal int64
bytesPruned int64
skippedJournals int64
failedToRemove int64
}

func logShardsPruneMetrics(m shardsPruneMetrics, journal, message string) {
Expand All @@ -215,6 +228,7 @@ func logShardsPruneMetrics(m shardsPruneMetrics, journal, message string) {
"bytesPruned": m.bytesPruned,
"bytesKept": m.bytesTotal - m.bytesPruned,
"skippedJournals": m.skippedJournals,
"failedToRemove": m.failedToRemove,
}
if journal != "" {
fields["journal"] = journal
Expand Down

0 comments on commit 338b339

Please sign in to comment.