@@ -51,13 +51,28 @@ type GetDocumentsApiResponse struct {
51
51
} `json:"results"`
52
52
}
53
53
54
+ // Document is a stripped down version of the document object from paperless-ngx.
55
+ // Response payload for /documents endpoint and part of request payload for /generate-suggestions endpoint
54
56
type Document struct {
55
- ID int `json:"id"`
56
- Title string `json:"title"`
57
- Content string `json:"content"`
58
- Tags []string `json:"tags"`
59
- SuggestedTitle string `json:"suggested_title,omitempty"`
60
- SuggestedTags []string `json:"suggested_tags,omitempty"`
57
+ ID int `json:"id"`
58
+ Title string `json:"title"`
59
+ Content string `json:"content"`
60
+ Tags []string `json:"tags"`
61
+ }
62
+
63
+ // GenerateSuggestionsRequest is the request payload for generating suggestions for /generate-suggestions endpoint
64
+ type GenerateSuggestionsRequest struct {
65
+ Documents []Document `json:"documents"`
66
+ GenerateTitles bool `json:"generate_titles,omitempty"`
67
+ GenerateTags bool `json:"generate_tags,omitempty"`
68
+ }
69
+
70
+ // DocumentSuggestion is the response payload for /generate-suggestions endpoint and the request payload for /update-documents endpoint (as an array)
71
+ type DocumentSuggestion struct {
72
+ ID int `json:"id"`
73
+ OriginalDocument Document `json:"original_document"`
74
+ SuggestedTitle string `json:"suggested_title,omitempty"`
75
+ SuggestedTags []string `json:"suggested_tags,omitempty"`
61
76
}
62
77
63
78
var (
@@ -207,14 +222,14 @@ func documentsHandler(c *gin.Context) {
207
222
func generateSuggestionsHandler (c * gin.Context ) {
208
223
ctx := c .Request .Context ()
209
224
210
- var documents [] Document
211
- if err := c .ShouldBindJSON (& documents ); err != nil {
225
+ var suggestionRequest GenerateSuggestionsRequest
226
+ if err := c .ShouldBindJSON (& suggestionRequest ); err != nil {
212
227
c .JSON (http .StatusBadRequest , gin.H {"error" : fmt .Sprintf ("Invalid request payload: %v" , err )})
213
228
log .Printf ("Invalid request payload: %v" , err )
214
229
return
215
230
}
216
231
217
- results , err := processDocuments (ctx , documents )
232
+ results , err := generateDocumentSuggestions (ctx , suggestionRequest )
218
233
if err != nil {
219
234
c .JSON (http .StatusInternalServerError , gin.H {"error" : fmt .Sprintf ("Error processing documents: %v" , err )})
220
235
log .Printf ("Error processing documents: %v" , err )
@@ -227,7 +242,7 @@ func generateSuggestionsHandler(c *gin.Context) {
227
242
// updateDocumentsHandler updates documents with new titles
228
243
func updateDocumentsHandler (c * gin.Context ) {
229
244
ctx := c .Request .Context ()
230
- var documents []Document
245
+ var documents []DocumentSuggestion
231
246
if err := c .ShouldBindJSON (& documents ); err != nil {
232
247
c .JSON (http .StatusBadRequest , gin.H {"error" : fmt .Sprintf ("Invalid request payload: %v" , err )})
233
248
log .Printf ("Invalid request payload: %v" , err )
@@ -244,50 +259,6 @@ func updateDocumentsHandler(c *gin.Context) {
244
259
c .Status (http .StatusOK )
245
260
}
246
261
247
- func getIDMappingForTags (ctx context.Context , baseURL , apiToken string , tagsToFilter []string ) (map [string ]int , error ) {
248
- url := fmt .Sprintf ("%s/api/tags/" , baseURL )
249
- req , err := http .NewRequestWithContext (ctx , "GET" , url , nil )
250
- if err != nil {
251
- return nil , err
252
- }
253
- req .Header .Set ("Authorization" , fmt .Sprintf ("Token %s" , apiToken ))
254
-
255
- client := & http.Client {}
256
- resp , err := client .Do (req )
257
- if err != nil {
258
- return nil , err
259
- }
260
- defer resp .Body .Close ()
261
-
262
- if resp .StatusCode != http .StatusOK {
263
- bodyBytes , _ := io .ReadAll (resp .Body )
264
- return nil , fmt .Errorf ("Error fetching tags: %d, %s" , resp .StatusCode , string (bodyBytes ))
265
- }
266
-
267
- var tagsResponse struct {
268
- Results []struct {
269
- ID int `json:"id"`
270
- Name string `json:"name"`
271
- } `json:"results"`
272
- }
273
-
274
- err = json .NewDecoder (resp .Body ).Decode (& tagsResponse )
275
- if err != nil {
276
- return nil , err
277
- }
278
-
279
- tagIDMapping := make (map [string ]int )
280
- for _ , tag := range tagsResponse .Results {
281
- for _ , filterTag := range tagsToFilter {
282
- if tag .Name == filterTag {
283
- tagIDMapping [tag .Name ] = tag .ID
284
- }
285
- }
286
- }
287
-
288
- return tagIDMapping , nil
289
- }
290
-
291
262
func getDocumentsByTags (ctx context.Context , baseURL , apiToken string , tags []string ) ([]Document , error ) {
292
263
tagQueries := make ([]string , len (tags ))
293
264
for i , tag := range tags {
@@ -348,7 +319,7 @@ func getDocumentsByTags(ctx context.Context, baseURL, apiToken string, tags []st
348
319
return documents , nil
349
320
}
350
321
351
- func processDocuments (ctx context.Context , documents [] Document ) ([]Document , error ) {
322
+ func generateDocumentSuggestions (ctx context.Context , suggestionRequest GenerateSuggestionsRequest ) ([]DocumentSuggestion , error ) {
352
323
llm , err := createLLM ()
353
324
if err != nil {
354
325
return nil , fmt .Errorf ("failed to create LLM client: %v" , err )
@@ -369,6 +340,9 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
369
340
availableTagNames = append (availableTagNames , tagName )
370
341
}
371
342
343
+ documents := suggestionRequest .Documents
344
+ documentSuggestions := []DocumentSuggestion {}
345
+
372
346
var wg sync.WaitGroup
373
347
var mu sync.Mutex
374
348
errors := make ([]error , 0 )
@@ -385,27 +359,50 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
385
359
content = content [:5000 ]
386
360
}
387
361
388
- suggestedTitle , err := getSuggestedTitle (ctx , llm , content )
389
- if err != nil {
390
- mu .Lock ()
391
- errors = append (errors , fmt .Errorf ("Document %d: %v" , documentID , err ))
392
- mu .Unlock ()
393
- log .Printf ("Error processing document %d: %v" , documentID , err )
394
- return
362
+ var suggestedTitle string
363
+ var suggestedTags []string
364
+
365
+ if suggestionRequest .GenerateTitles {
366
+ suggestedTitle , err = getSuggestedTitle (ctx , llm , content )
367
+ if err != nil {
368
+ mu .Lock ()
369
+ errors = append (errors , fmt .Errorf ("Document %d: %v" , documentID , err ))
370
+ mu .Unlock ()
371
+ log .Printf ("Error processing document %d: %v" , documentID , err )
372
+ return
373
+ }
395
374
}
396
375
397
- suggestedTags , err := getSuggestedTags (ctx , llm , content , suggestedTitle , availableTagNames )
398
- if err != nil {
399
- mu .Lock ()
400
- errors = append (errors , fmt .Errorf ("Document %d: %v" , documentID , err ))
401
- mu .Unlock ()
402
- log .Printf ("Error generating tags for document %d: %v" , documentID , err )
403
- return
376
+ if suggestionRequest .GenerateTags {
377
+ suggestedTags , err = getSuggestedTags (ctx , llm , content , suggestedTitle , availableTagNames )
378
+ if err != nil {
379
+ mu .Lock ()
380
+ errors = append (errors , fmt .Errorf ("Document %d: %v" , documentID , err ))
381
+ mu .Unlock ()
382
+ log .Printf ("Error generating tags for document %d: %v" , documentID , err )
383
+ return
384
+ }
404
385
}
405
386
406
387
mu .Lock ()
407
- doc .SuggestedTitle = suggestedTitle
408
- doc .SuggestedTags = suggestedTags
388
+ suggestion := DocumentSuggestion {
389
+ ID : documentID ,
390
+ OriginalDocument : * doc ,
391
+ }
392
+ // Titles
393
+ if suggestionRequest .GenerateTitles {
394
+ suggestion .SuggestedTitle = suggestedTitle
395
+ } else {
396
+ suggestion .SuggestedTitle = doc .Title
397
+ }
398
+
399
+ // Tags
400
+ if suggestionRequest .GenerateTags {
401
+ suggestion .SuggestedTags = suggestedTags
402
+ } else {
403
+ suggestion .SuggestedTags = removeTagFromList (doc .Tags , tagToFilter )
404
+ }
405
+ documentSuggestions = append (documentSuggestions , suggestion )
409
406
mu .Unlock ()
410
407
log .Printf ("Document %d processed successfully." , documentID )
411
408
}(& documents [i ])
@@ -417,7 +414,17 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
417
414
return nil , errors [0 ]
418
415
}
419
416
420
- return documents , nil
417
+ return documentSuggestions , nil
418
+ }
419
+
420
+ func removeTagFromList (tags []string , tagToRemove string ) []string {
421
+ filteredTags := []string {}
422
+ for _ , tag := range tags {
423
+ if tag != tagToRemove {
424
+ filteredTags = append (filteredTags , tag )
425
+ }
426
+ }
427
+ return filteredTags
421
428
}
422
429
423
430
func getSuggestedTags (ctx context.Context , llm llms.Model , content string , suggestedTitle string , availableTags []string ) ([]string , error ) {
@@ -507,7 +514,7 @@ Content:
507
514
return strings .TrimSpace (strings .Trim (completion .Choices [0 ].Content , "\" " )), nil
508
515
}
509
516
510
- func updateDocuments (ctx context.Context , baseURL , apiToken string , documents []Document ) error {
517
+ func updateDocuments (ctx context.Context , baseURL , apiToken string , documents []DocumentSuggestion ) error {
511
518
client := & http.Client {}
512
519
513
520
// Fetch all available tags
@@ -524,8 +531,13 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []
524
531
525
532
newTags := []int {}
526
533
534
+ tags := document .SuggestedTags
535
+ if len (tags ) == 0 {
536
+ tags = document .OriginalDocument .Tags
537
+ }
538
+
527
539
// Map suggested tag names to IDs
528
- for _ , tagName := range document . SuggestedTags {
540
+ for _ , tagName := range tags {
529
541
if tagID , exists := availableTags [tagName ]; exists {
530
542
// Skip the tag that we are filtering
531
543
if tagName == tagToFilter {
@@ -543,7 +555,11 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []
543
555
if len (suggestedTitle ) > 128 {
544
556
suggestedTitle = suggestedTitle [:128 ]
545
557
}
546
- updatedFields ["title" ] = suggestedTitle
558
+ if suggestedTitle != "" {
559
+ updatedFields ["title" ] = suggestedTitle
560
+ } else {
561
+ log .Printf ("No valid title found for document %d, skipping." , documentID )
562
+ }
547
563
548
564
// Send the update request
549
565
url := fmt .Sprintf ("%s/api/documents/%d/" , baseURL , documentID )
0 commit comments