Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pkg/email/lore: extract patch series #5656

Merged
merged 1 commit into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 163 additions & 6 deletions pkg/email/lore/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
package lore

import (
"fmt"
"regexp"
"sort"
"strconv"
"strings"

"github.com/google/syzkaller/dashboard/dashapi"
"github.com/google/syzkaller/pkg/email"
)

// Thread is a generic representation of a single discussion in the mailing list.
type Thread struct {
Subject string
MessageID string
Expand All @@ -20,9 +23,28 @@ type Thread struct {
Messages []*email.Email
}

// Series represents a single patch series sent over email.
type Series struct {
tarasmadan marked this conversation as resolved.
Show resolved Hide resolved
Subject string
MessageID string
Version int
Corrupted string // If non-empty, contains a reason why the series better be ignored.
Patches []Patch
}

type Patch struct {
Seq int
*email.Email
}

// Threads extracts individual threads from a list of emails.
func Threads(emails []*email.Email) []*Thread {
return listThreads(emails, 0)
}

func listThreads(emails []*email.Email, maxDepth int) []*Thread {
ctx := &parseCtx{
maxDepth: maxDepth,
messages: map[string]*email.Email{},
next: map[*email.Email][]*email.Email{},
}
Expand All @@ -33,24 +55,128 @@ func Threads(emails []*email.Email) []*Thread {
return ctx.threads
}

// PatchSeries is similar to Threads, but returns only the patch series submitted to the mailing lists.
func PatchSeries(emails []*email.Email) []*Series {
var ret []*Series
// Normally, all following series patches are sent in response to the first email sent.
// So there's no sense to look at deeper replies.
for _, thread := range listThreads(emails, 1) {
if thread.Type != dashapi.DiscussionPatch {
continue
}
patch, ok := parsePatchSubject(thread.Subject)
if !ok {
// It must never be happening.
panic("DiscussionPatch is set, but we fail to parse the thread subject")
tarasmadan marked this conversation as resolved.
Show resolved Hide resolved
}
total := patch.Total.ValueOr(1)
series := &Series{
Subject: patch.Title,
MessageID: thread.MessageID,
Version: patch.Version.ValueOr(1),
}
ret = append(ret, series)
if patch.Seq.IsSet() && patch.Seq.Value() > 1 {
series.Corrupted = "the first patch has seq>1"
continue
}
hasSeq := map[int]bool{}
for _, email := range thread.Messages {
patch, ok := parsePatchSubject(email.Subject)
if !ok {
continue
}
seq := patch.Seq.ValueOr(1)
if seq == 0 {
// The cover email is not of interest.
continue
}
if hasSeq[seq] {
// It's weird if that really happens, but let's skip for now.
continue
}
hasSeq[seq] = true
series.Patches = append(series.Patches, Patch{
Seq: seq,
Email: email,
})
}
if len(hasSeq) != total {
series.Corrupted = fmt.Sprintf("the subject mentions %d patches, %d are found",
total, len(hasSeq))
continue
}
if len(series.Patches) == 0 {
series.Corrupted = "0 patches"
continue
}
sort.Slice(series.Patches, func(i, j int) bool {
return series.Patches[i].Seq < series.Patches[j].Seq
})
}
return ret
}

// DiscussionType extracts the specific discussion type from an email.
func DiscussionType(msg *email.Email) dashapi.DiscussionType {
discType := dashapi.DiscussionMention
if msg.OwnEmail {
discType = dashapi.DiscussionReport
}
// This is very crude, but should work for now.
if patchSubjectRe.MatchString(strings.ToLower(msg.Subject)) {
if _, ok := parsePatchSubject(msg.Subject); ok {
discType = dashapi.DiscussionPatch
} else if strings.Contains(msg.Subject, "Monthly") {
discType = dashapi.DiscussionReminder
}
return discType
}

var patchSubjectRe = regexp.MustCompile(`\[(?:(?:rfc|resend)\s+)*patch`)
type PatchSubject struct {
Title string
Tags []string // Sometimes there's e.g. "net" or "next-next" in the subject.
Version Optional[int]
Seq Optional[int] // The "Seq/Total" part.
Total Optional[int]
}

// nolint: lll
var patchSubjectRe = regexp.MustCompile(`(?mi)^\[(?:([\w\s-]+)\s)?PATCH(?:\s([\w\s-]+))??(?:\s0*(\d+)\/(\d+))?\]\s*(.+)`)

func parsePatchSubject(subject string) (PatchSubject, bool) {
var ret PatchSubject
groups := patchSubjectRe.FindStringSubmatch(subject)
if len(groups) == 0 {
return ret, false
}
tags := strings.Fields(groups[1])
for _, tag := range append(tags, strings.Fields(groups[2])...) {
if strings.HasPrefix(tag, "v") {
val, err := strconv.Atoi(strings.TrimPrefix(tag, "v"))
if err == nil {
ret.Version.Set(val)
continue
}
}
ret.Tags = append(ret.Tags, tag)
}
sort.Strings(ret.Tags)
if groups[3] != "" {
if val, err := strconv.Atoi(groups[3]); err == nil {
ret.Seq.Set(val)
}
}
if groups[4] != "" {
if val, err := strconv.Atoi(groups[4]); err == nil {
ret.Total.Set(val)
}
}
ret.Title = groups[5]
return ret, true
}

type parseCtx struct {
maxDepth int
threads []*Thread
messages map[string]*email.Email
next map[*email.Email][]*email.Email
Expand All @@ -73,7 +199,7 @@ func (c *parseCtx) process() {
}
// Iterate starting from these tree nodes.
for _, node := range nodes {
c.visit(node, nil)
c.visit(node, nil, 0)
}
// Collect BugIDs.
for _, thread := range c.threads {
Expand All @@ -92,7 +218,7 @@ func (c *parseCtx) process() {
}
}

func (c *parseCtx) visit(msg *email.Email, thread *Thread) {
func (c *parseCtx) visit(msg *email.Email, thread *Thread, depth int) {
var oldInfo *email.OldThreadInfo
if thread != nil {
oldInfo = &email.OldThreadInfo{
Expand All @@ -114,7 +240,38 @@ func (c *parseCtx) visit(msg *email.Email, thread *Thread) {
}
c.threads = append(c.threads, thread)
}
for _, nextMsg := range c.next[msg] {
c.visit(nextMsg, thread)
if c.maxDepth == 0 || depth < c.maxDepth {
for _, nextMsg := range c.next[msg] {
c.visit(nextMsg, thread, depth+1)
}
}
}

type Optional[T any] struct {
val T
set bool
}

func value[T any](val T) Optional[T] {
return Optional[T]{val: val, set: true}
}

func (o Optional[T]) IsSet() bool {
return o.set
}

func (o Optional[T]) Value() T {
return o.val
}

func (o Optional[T]) ValueOr(def T) T {
if o.set {
return o.val
}
return def
}

func (o *Optional[T]) Set(val T) {
o.val = val
o.set = true
}
Loading
Loading