From 90b2ce52eda9c7de32194fb42610ca46a9a77ce7 Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 15:52:37 -0600 Subject: [PATCH 01/16] Add WriteDigestCountCheckBox to SearchTaskWindow and Search Task Parameters --- MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml | 8 ++++++++ MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml.cs | 2 ++ MetaMorpheus/TaskLayer/SearchTask/SearchParameters.cs | 3 ++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml b/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml index 3b0914ecf..ed1a57e7a 100644 --- a/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml +++ b/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml @@ -1171,6 +1171,14 @@ + + + + Checking this box will append the modification motif onto modification names in the .mzID output, e.g. "Carbamidomethylation on C" + + + diff --git a/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml.cs b/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml.cs index 124ceebd5..c5d0c86b4 100644 --- a/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml.cs +++ b/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml.cs @@ -332,6 +332,7 @@ private void UpdateFieldsFromTask(SearchTask task) OutputFileNameTextBox.Text = task.CommonParameters.TaskDescriptor; CkbMzId.IsChecked = task.SearchParameters.WriteMzId; + WriteDigestCountCheckBox.IsChecked = task.SearchParameters.WriteDigestionProductCountFile; WriteHighQPsmsCheckBox.IsChecked = task.SearchParameters.WriteHighQValuePsms; WriteDecoyCheckBox.IsChecked = task.SearchParameters.WriteDecoys; WriteContaminantCheckBox.IsChecked = task.SearchParameters.WriteContaminants; @@ -650,6 +651,7 @@ private void SaveButton_Click(object sender, RoutedEventArgs e) TheTask.SearchParameters.UpdateSpectralLibrary = UpdateSpectralLibraryCheckBox.IsChecked.Value; TheTask.SearchParameters.CompressIndividualFiles = CompressIndividualResultsCheckBox.IsChecked.Value; TheTask.SearchParameters.IncludeModMotifInMzid = IncludeMotifInModNamesCheckBox.IsChecked.Value; + TheTask.SearchParameters.WriteDigestionProductCountFile = WriteDigestCountCheckBox.IsChecked.Value; if (RemoveContaminantRadioBox.IsChecked.Value) { diff --git a/MetaMorpheus/TaskLayer/SearchTask/SearchParameters.cs b/MetaMorpheus/TaskLayer/SearchTask/SearchParameters.cs index 080ee2c6d..d0c4b4312 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/SearchParameters.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/SearchParameters.cs @@ -2,7 +2,6 @@ using UsefulProteomicsDatabases; using EngineLayer; using Omics.Modifications; -using Proteomics; namespace TaskLayer { @@ -32,6 +31,7 @@ public SearchParameters() WriteMzId = true; WritePepXml = false; IncludeModMotifInMzid = false; + WriteDigestionProductCountFile = false; ModsToWriteSelection = new Dictionary { @@ -103,5 +103,6 @@ public SearchParameters() public SilacLabel EndTurnoverLabel { get; set; } //used for SILAC turnover experiments public TargetContaminantAmbiguity TCAmbiguity { get; set; } public bool IncludeModMotifInMzid { get; set; } + public bool WriteDigestionProductCountFile { get; set; } } } \ No newline at end of file From f8bfbdb0325142359d1e40dfc28664df4d6ad239 Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 16:28:15 -0600 Subject: [PATCH 02/16] Add DictionaryExtensions class with AddOrCreate method with tests --- .../EngineLayer/Util/DictionaryExtensions.cs | 27 +++++++ .../Test/DictionaryExtensionsTests.cs | 72 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs create mode 100644 MetaMorpheus/Test/DictionaryExtensionsTests.cs diff --git a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs new file mode 100644 index 000000000..abd48a122 --- /dev/null +++ b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs @@ -0,0 +1,27 @@ +using System.Collections.Generic; + +namespace EngineLayer; + +public static class DictionaryExtensions +{ + /// + /// Adds a value to the list associated with the specified key in the dictionary. + /// If the key does not exist, a new list is created with the value and added to the dictionary. + /// + /// The type of the keys in the dictionary. + /// The type of the values in the lists. + /// The dictionary to operate on. + /// The key whose value list to add to or create. + /// The value to add to the list associated with the specified key. + public static void AddOrCreate(this IDictionary> dictionary, TKey key, TValues value) + { + if (dictionary.TryGetValue(key, out IList values)) + { + values.Add(value); + } + else + { + dictionary.Add(key, new List { value }); + } + } +} \ No newline at end of file diff --git a/MetaMorpheus/Test/DictionaryExtensionsTests.cs b/MetaMorpheus/Test/DictionaryExtensionsTests.cs new file mode 100644 index 000000000..b391d561b --- /dev/null +++ b/MetaMorpheus/Test/DictionaryExtensionsTests.cs @@ -0,0 +1,72 @@ +using EngineLayer; +using NUnit.Framework; +using System.Collections.Generic; + +namespace Test +{ + [TestFixture] + public class DictionaryExtensionsTests + { + [Test] + public void AddOrCreate_AddsValueToExistingKey() + { + // Arrange + var dictionary = new Dictionary> + { + { "key1", new List { 1, 2 } } + }; + + // Act + dictionary.AddOrCreate("key1", 3); + + // Assert + Assert.That(dictionary["key1"], Is.EquivalentTo(new List { 1, 2, 3 })); + } + + [Test] + public void AddOrCreate_CreatesNewKeyWithList() + { + // Arrange + var dictionary = new Dictionary>(); + + // Act + dictionary.AddOrCreate("key1", 1); + + // Assert + Assert.That(dictionary.ContainsKey("key1")); + Assert.That(dictionary["key1"], Is.EquivalentTo(new List { 1 })); + } + + [Test] + public void AddOrCreate_AddsMultipleValuesToNewKey() + { + // Arrange + var dictionary = new Dictionary>(); + + // Act + dictionary.AddOrCreate("key1", 1); + dictionary.AddOrCreate("key1", 2); + dictionary.AddOrCreate("key1", 3); + + // Assert + Assert.That(dictionary["key1"], Is.EquivalentTo(new List { 1, 2, 3 })); + } + + [Test] + public void AddOrCreate_AddsMultipleValuesToExistingKey() + { + // Arrange + var dictionary = new Dictionary> + { + { "key1", new List { 1 } } + }; + + // Act + dictionary.AddOrCreate("key1", 2); + dictionary.AddOrCreate("key1", 3); + + // Assert + Assert.That(dictionary["key1"], Is.EquivalentTo(new List { 1, 2, 3 })); + } + } +} From 83d956dfe1aef263bd94d7ac4b6e0693ed0ebabe Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 16:28:53 -0600 Subject: [PATCH 03/16] moved anlyte type to util folder --- MetaMorpheus/EngineLayer/{ => Util}/AnalyteType.cs | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename MetaMorpheus/EngineLayer/{ => Util}/AnalyteType.cs (100%) diff --git a/MetaMorpheus/EngineLayer/AnalyteType.cs b/MetaMorpheus/EngineLayer/Util/AnalyteType.cs similarity index 100% rename from MetaMorpheus/EngineLayer/AnalyteType.cs rename to MetaMorpheus/EngineLayer/Util/AnalyteType.cs From 2cc25f678fca6ef48c5033d99ec9d75c4383f9d4 Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 16:51:22 -0600 Subject: [PATCH 04/16] Add Increment method to DictionaryExtensions Added a generic Increment method to DictionaryExtensions.cs that increments the value of a specified key or initializes it to one if the key does not exist. Included XML documentation for the method. Updated DictionaryExtensionsTests.cs with unit tests covering various scenarios for the Increment method. --- .../EngineLayer/Util/DictionaryExtensions.cs | 27 +++++++- .../Test/DictionaryExtensionsTests.cs | 63 +++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs index abd48a122..ee055858a 100644 --- a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs +++ b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs @@ -1,4 +1,6 @@ -using System.Collections.Generic; +using Nett; +using System.Collections.Generic; +using System.Numerics; namespace EngineLayer; @@ -24,4 +26,25 @@ public static void AddOrCreate(this IDictionary { value }); } } -} \ No newline at end of file + + /// + /// Increments the value associated with the specified key in the dictionary. + /// If the key does not exist, a new entry is created with the value set to one. + /// + /// The type of the keys in the dictionary. + /// The type of the values in the dictionary, which must implement . + /// The dictionary to operate on. + /// The key whose value to increment or create. + public static void Increment(this IDictionary dictionary, TKey key) + where TValue : INumber + { + if (dictionary.TryGetValue(key, out TValue value)) + { + dictionary[key] = value + TValue.One; + } + else + { + dictionary.Add(key, TValue.One); + } + } +} diff --git a/MetaMorpheus/Test/DictionaryExtensionsTests.cs b/MetaMorpheus/Test/DictionaryExtensionsTests.cs index b391d561b..b44eea457 100644 --- a/MetaMorpheus/Test/DictionaryExtensionsTests.cs +++ b/MetaMorpheus/Test/DictionaryExtensionsTests.cs @@ -68,5 +68,68 @@ public void AddOrCreate_AddsMultipleValuesToExistingKey() // Assert Assert.That(dictionary["key1"], Is.EquivalentTo(new List { 1, 2, 3 })); } + + [Test] + public void Increment_IncrementsExistingKey() + { + // Arrange + var dictionary = new Dictionary + { + { "key1", 1 } + }; + + // Act + dictionary.Increment("key1"); + + // Assert + Assert.That(dictionary["key1"], Is.EqualTo(2)); + } + + [Test] + public void Increment_AddsNewKeyWithInitialValue() + { + // Arrange + var dictionary = new Dictionary(); + + // Act + dictionary.Increment("key1"); + + // Assert + Assert.That(dictionary.ContainsKey("key1")); + Assert.That(dictionary["key1"], Is.EqualTo(1)); + } + + [Test] + public void Increment_IncrementsMultipleTimes() + { + // Arrange + var dictionary = new Dictionary + { + { "key1", 1 } + }; + + // Act + dictionary.Increment("key1"); + dictionary.Increment("key1"); + dictionary.Increment("key1"); + + // Assert + Assert.That(dictionary["key1"], Is.EqualTo(4)); + } + + [Test] + public void Increment_AddsAndIncrementsNewKey() + { + // Arrange + var dictionary = new Dictionary(); + + // Act + dictionary.Increment("key1"); + dictionary.Increment("key1"); + + // Assert + Assert.That(dictionary.ContainsKey("key1")); + Assert.That(dictionary["key1"], Is.EqualTo(2)); + } } } From ccdf5c961dea5f201db42e1c4cfcdc8b244fcbe7 Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 16:55:26 -0600 Subject: [PATCH 05/16] Add feature to track digestion product counts per protein to ClassicSearchEngine --- .../EngineLayer/ClassicSearch/ClassicSearchEngine.cs | 10 ++++++++-- MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs b/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs index 6c0d42f03..2bc59ae89 100644 --- a/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs +++ b/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs @@ -7,7 +7,6 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; -using System.Threading; using Omics.Modifications; using System.Collections.Concurrent; @@ -26,12 +25,14 @@ public class ClassicSearchEngine : MetaMorpheusEngine private readonly Ms2ScanWithSpecificMass[] ArrayOfSortedMS2Scans; private readonly double[] MyScanPrecursorMasses; private readonly bool WriteSpectralLibrary; + private readonly bool WriteDigestionCounts;// Used to track the amount of digestion products from each protein when the option is enabled. + private readonly ConcurrentDictionary DigestionCountDictionary; private readonly object[] Locks; public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans, List variableModifications, List fixedModifications, List silacLabels, SilacLabel startLabel, SilacLabel endLabel, List proteinList, MassDiffAcceptor searchMode, CommonParameters commonParameters, List<(string FileName, CommonParameters Parameters)> fileSpecificParameters, - SpectralLibrary spectralLibrary, List nestedIds, bool writeSpectralLibrary) + SpectralLibrary spectralLibrary, List nestedIds, bool writeSpectralLibrary, bool writeDigestionCounts = false) : base(commonParameters, fileSpecificParameters, nestedIds) { PeptideSpectralMatches = globalPsms; @@ -48,6 +49,8 @@ public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] SearchMode = searchMode; SpectralLibrary = spectralLibrary; WriteSpectralLibrary = writeSpectralLibrary; + WriteDigestionCounts = writeDigestionCounts; + DigestionCountDictionary = new(); // Create one lock for each PSM to ensure thread safety Locks = new object[PeptideSpectralMatches.Length]; @@ -108,6 +111,9 @@ protected override MetaMorpheusEngineResults RunSpecific() // digest each protein into peptides and search for each peptide in all spectra within precursor mass tolerance foreach (PeptideWithSetModifications peptide in Proteins[i].Digest(CommonParameters.DigestionParams, FixedModifications, VariableModifications, SilacLabels, TurnoverLabels)) { + if (WriteDigestionCounts) + DigestionCountDictionary.Increment(peptide.Parent.Accession); + PeptideWithSetModifications reversedOnTheFlyDecoy = null; if (SpectralLibrary != null) diff --git a/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs b/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs index 6329d27a3..3aa627041 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs @@ -374,7 +374,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List Date: Tue, 28 Jan 2025 16:59:35 -0600 Subject: [PATCH 06/16] Only write tsv file for classic search --- MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml b/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml index ed1a57e7a..aa5ef525e 100644 --- a/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml +++ b/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml @@ -1172,10 +1172,10 @@ + Content="Write digestion product count histogram" IsEnabled="{Binding IsChecked, ElementName=ClassicSearchRadioButton}"> - Checking this box will append the modification motif onto modification names in the .mzID output, e.g. "Carbamidomethylation on C" + Checking this box will create an additional output file with a histogram of the number of digestion products per protein. From 3657dc60b23a5a5e07951ecc8741aa6d66f1a870 Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 17:43:59 -0600 Subject: [PATCH 07/16] Add IsNullOrEmpty method to DictionaryExtensions Added IsNullOrEmpty method to DictionaryExtensions.cs to check if a dictionary is null or empty. Included XML documentation for the method. Added unit tests in DictionaryExtensionsTests.cs to verify the method's behavior for null, empty, and non-empty dictionaries. --- .../EngineLayer/Util/DictionaryExtensions.cs | 14 +++++++ .../Test/DictionaryExtensionsTests.cs | 41 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs index ee055858a..9b92a7ff8 100644 --- a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs +++ b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs @@ -47,4 +47,18 @@ public static void Increment(this IDictionary dictio dictionary.Add(key, TValue.One); } } + + /// + /// Determines whether the dictionary is null or has no elements. + /// + /// The type of the keys in the dictionary. + /// The type of the values in the dictionary. + /// The dictionary to check. + /// + /// true if the dictionary is null or has no elements; otherwise, false. + /// + public static bool IsNullOrEmpty(this IDictionary dictionary) + { + return dictionary == null || dictionary.Count == 0; + } } diff --git a/MetaMorpheus/Test/DictionaryExtensionsTests.cs b/MetaMorpheus/Test/DictionaryExtensionsTests.cs index b44eea457..bb315f276 100644 --- a/MetaMorpheus/Test/DictionaryExtensionsTests.cs +++ b/MetaMorpheus/Test/DictionaryExtensionsTests.cs @@ -131,5 +131,46 @@ public void Increment_AddsAndIncrementsNewKey() Assert.That(dictionary.ContainsKey("key1")); Assert.That(dictionary["key1"], Is.EqualTo(2)); } + [Test] + public void IsNullOrEmpty_ReturnsTrueForNullDictionary() + { + // Arrange + Dictionary dictionary = null; + + // Act + var result = dictionary.IsNullOrEmpty(); + + // Assert + Assert.That(result, Is.True); + } + + [Test] + public void IsNullOrEmpty_ReturnsTrueForEmptyDictionary() + { + // Arrange + var dictionary = new Dictionary(); + + // Act + var result = dictionary.IsNullOrEmpty(); + + // Assert + Assert.That(result, Is.True); + } + + [Test] + public void IsNullOrEmpty_ReturnsFalseForNonEmptyDictionary() + { + // Arrange + var dictionary = new Dictionary + { + { "key1", 1 } + }; + + // Act + var result = dictionary.IsNullOrEmpty(); + + // Assert + Assert.That(result, Is.False); + } } } From 7c33067069e2a0e3e000782d1fd6064d089e619f Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 19:18:29 -0600 Subject: [PATCH 08/16] Add feature to track and write digestion product counts Introduced functionality to track and write digestion product counts for proteins during a search task. Key changes include: - Made `DigestionCountDictionary` a public readonly field in `ClassicSearchEngine.cs` and adjusted the constructor accordingly. - Added an internal property `DigestionCountDictionary` in `PostSearchAnalysisTask.cs` and implemented methods to write counts to .tsv files. - Modified `SearchTask.cs` to initialize and pass `digestionCountDictionary` to `PostSearchAnalysisTask`. - Added tests in `PostSearchAnalysisTaskTests.cs` to verify the correct writing of digestion counts and histograms. --- .../ClassicSearch/ClassicSearchEngine.cs | 4 +- .../SearchTask/PostSearchAnalysisTask.cs | 64 ++++- .../TaskLayer/SearchTask/SearchTask.cs | 13 +- .../Test/PostSearchAnalysisTaskTests.cs | 218 ++++++++++++++++++ 4 files changed, 294 insertions(+), 5 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs b/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs index 2bc59ae89..733b234f0 100644 --- a/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs +++ b/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs @@ -25,9 +25,9 @@ public class ClassicSearchEngine : MetaMorpheusEngine private readonly Ms2ScanWithSpecificMass[] ArrayOfSortedMS2Scans; private readonly double[] MyScanPrecursorMasses; private readonly bool WriteSpectralLibrary; - private readonly bool WriteDigestionCounts;// Used to track the amount of digestion products from each protein when the option is enabled. - private readonly ConcurrentDictionary DigestionCountDictionary; + private readonly bool WriteDigestionCounts; private readonly object[] Locks; + public readonly ConcurrentDictionary DigestionCountDictionary; // Used to track the amount of digestion products from each protein when the option is enabled. public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans, List variableModifications, List fixedModifications, List silacLabels, SilacLabel startLabel, SilacLabel endLabel, diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index bf6734e70..addbdd3bd 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -35,7 +35,10 @@ public class PostSearchAnalysisTask : MetaMorpheusTask /// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary() /// private Dictionary<(string,string),string> ResultsDictionary { get; set; } - + /// + /// Used for storage of results for writing digestion product counts to a .tsv. + /// + internal IDictionary? DigestionCountDictionary { get; set; } public PostSearchAnalysisTask() : base(MyTask.Search) { @@ -110,6 +113,12 @@ public MyTaskResults Run() UpdateSpectralLibrary(); } + if (DigestionCountDictionary != null && DigestionCountDictionary.Any()) + { + WriteDigestionCountByProtein(); + WriteDigestionCountHistogram(); + } + WriteFlashLFQResults(); if (Parameters.ProteinList.Any((p => p.AppliedSequenceVariations.Count > 0))) @@ -1939,5 +1948,58 @@ private void WritePeakQuantificationResultsToTsv(FlashLfqResults flashLFQResults FinishedWritingFile(peaksPath, nestedIds); } + + /// + /// Writes the digestion product counts for each protein to a .tsv file. + /// + private void WriteDigestionCountByProtein() + { + if (DigestionCountDictionary.IsNullOrEmpty()) + return; + + var nestedIds = new List { Parameters.SearchTaskId }; + var countByProteinPath = Path.Combine(Parameters.OutputFolder, $"DigestionCountsBy{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s.tsv"); + + // write all values to file + using (var writer = new StreamWriter(countByProteinPath)) + { + writer.WriteLine("Protein Accession\tDigestion Products"); + foreach (var proteinEntry in DigestionCountDictionary!) + { + writer.WriteLine($"{proteinEntry.Key}\t{proteinEntry.Value}"); + } + } + FinishedWritingFile(countByProteinPath, nestedIds); + } + + /// + /// Writes a histogram of digestion product counts to a .tsv file. + /// + private void WriteDigestionCountHistogram() + { + if (DigestionCountDictionary.IsNullOrEmpty()) + return; + + var nestedIds = new List { Parameters.SearchTaskId }; + var countHistogramPath = Path.Combine(Parameters.OutputFolder, $"DigestionCountHistogram.tsv"); + + // Create Histogram + var countDictionary = new Dictionary(CommonParameters.DigestionParams.MaxModificationIsoforms); + foreach (var proteinEntry in DigestionCountDictionary!) + { + countDictionary.Increment(proteinEntry.Value); + } + + // Write Histogram + using (StreamWriter writer = new(countHistogramPath)) + { + writer.WriteLine($"Digestion Products\tCount of {GlobalVariables.AnalyteType.GetBioPolymerLabel()}s"); + foreach (var count in countDictionary.OrderBy(p => p.Key)) + { + writer.WriteLine($"{count.Key}\t{count.Value}"); + } + } + FinishedWritingFile(countHistogramPath, nestedIds); + } } } \ No newline at end of file diff --git a/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs b/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs index 3aa627041..07441761d 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs @@ -203,6 +203,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List { taskId, "Individual Spectra Files" }); Dictionary numMs2SpectraPerFile = new Dictionary(); + IDictionary digestionCountDictionary = null; for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } @@ -375,7 +376,14 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List + { + { "Protein1", 5 }, + { "Protein2", 10 } + }; + task.GetType().GetProperty("DigestionCountDictionary", BindingFlags.NonPublic | BindingFlags.Instance).SetValue(task, digestionCountDictionary); + + // Act + var method = task.GetType().GetMethod("WriteDigestionCountByProtein", BindingFlags.NonPublic | BindingFlags.Instance); + method!.Invoke(task, null); + + // Assert + var expectedFilePath = Path.Combine(parameters.OutputFolder, "DigestionCountsByProteins.tsv"); + Assert.That(File.Exists(expectedFilePath), Is.True); + + var lines = File.ReadAllLines(expectedFilePath); + Assert.That(lines.Length, Is.EqualTo(3)); + Assert.That(lines[0], Is.EqualTo("Protein Accession\tDigestion Products")); + Assert.That(lines[1], Is.EqualTo("Protein1\t5")); + Assert.That(lines[2], Is.EqualTo("Protein2\t10")); + + // Cleanup + Directory.Delete(parameters.OutputFolder, true); + } + + [Test] + public static void WriteDigestionCountsHistogram_WritesCorrectFile() + { + // Arrange + var task = new PostSearchAnalysisTask() { CommonParameters = new() }; + var outputDirectory = Path.Combine(TestContext.CurrentContext.WorkDirectory, "DigestionHistogramTest"); + if (Directory.Exists(outputDirectory)) + Directory.Delete(outputDirectory, true); + Directory.CreateDirectory(outputDirectory); + var parameters = new PostSearchAnalysisParameters + { + OutputFolder = outputDirectory, + SearchTaskId = "TestTask" + }; + task.GetType().GetProperty("Parameters").SetValue(task, parameters); + var digestionCountDictionary = new Dictionary + { + { "Protein1", 5 }, + { "Protein2", 10 }, + { "Protein3", 5 } + }; + task.GetType().GetProperty("DigestionCountDictionary", BindingFlags.NonPublic | BindingFlags.Instance).SetValue(task, digestionCountDictionary); + + // Act + var method = task.GetType().GetMethod("WriteDigestionCountHistogram", BindingFlags.NonPublic | BindingFlags.Instance); + method.Invoke(task, null); + + // Assert + var expectedFilePath = Path.Combine(parameters.OutputFolder, "DigestionCountHistogram.tsv"); + Assert.That(File.Exists(expectedFilePath), Is.True); + var lines = File.ReadAllLines(expectedFilePath); + Assert.That(lines.Length, Is.EqualTo(3)); + Assert.That(lines[0], Is.EqualTo("Digestion Products\tCount of Proteins")); + Assert.That(lines[1], Is.EqualTo("5\t2")); + Assert.That(lines[2], Is.EqualTo("10\t1")); + + // Cleanup + Directory.Delete(parameters.OutputFolder, true); + } + + public record DigestionCountTestCase(string DbPath, int MaxIsoforms, bool UseVariableMods, string Name) + { + public override string ToString() + { + return Name; + } + }; + + public static IEnumerable GetDigestionCountTestCases() + { + // single protein, single peptide + yield return new DigestionCountTestCase("DatabaseTests//ProteaseModTest.fasta", 1, false, "SingleProteinSinglePeptide_NoMods"); + yield return new DigestionCountTestCase("DatabaseTests//ProteaseModTest.fasta", 1, true, "SingleProteinSinglePeptide_WithMods"); + yield return new DigestionCountTestCase("DatabaseTests//ProteaseModTest.fasta", 128, false, "SingleProteinSinglePeptide_ManyIsoforms_NoMods"); + yield return new DigestionCountTestCase("DatabaseTests//ProteaseModTest.fasta", 128, true, "SingleProteinSinglePeptide_ManyIsoforms_WithMods"); + + // single protein, two peptide + yield return new DigestionCountTestCase("indexEngineTestFasta.fasta", 1, false, "SingleProteinTwoPeptide_NoMods"); + yield return new DigestionCountTestCase("indexEngineTestFasta.fasta", 1, true, "SingleProteinTwoPeptide_WithMods"); + yield return new DigestionCountTestCase("indexEngineTestFasta.fasta", 128, false, "SingleProteinTwoPeptide_ManyIsoforms_NoMods"); + yield return new DigestionCountTestCase("indexEngineTestFasta.fasta", 128, true, "SingleProteinTwoPeptide_ManyIsoforms_WithMods"); + + // single protein, many peptides + yield return new DigestionCountTestCase("DatabaseTests//Q9UHB6.FASTA", 1, false, "SingleProteinManyPeptides_NoMods"); + yield return new DigestionCountTestCase("DatabaseTests//Q9UHB6.FASTA", 1, true, "SingleProteinManyPeptides_WithMods"); + yield return new DigestionCountTestCase("DatabaseTests//Q9UHB6.FASTA", 128, false, "SingleProteinManyPeptides_ManyIsoforms_NoMods"); + yield return new DigestionCountTestCase("DatabaseTests//Q9UHB6.FASTA", 128, true, "SingleProteinManyPeptides_ManyIsoforms_WithMods"); + + // many proteins, even more peptides + yield return new DigestionCountTestCase("TestData//DbForPrunedDb.fasta", 1, false, "ManyProteinsManyPeptides_NoMods"); + yield return new DigestionCountTestCase("TestData//DbForPrunedDb.fasta", 1, true, "ManyProteinsManyPeptides_WithMods"); + yield return new DigestionCountTestCase("TestData//DbForPrunedDb.fasta", 1024, false, "ManyProteinsManyPeptides_ManyIsoforms_NoMods"); + yield return new DigestionCountTestCase("TestData//DbForPrunedDb.fasta", 1024, true, "ManyProteinsManyPeptides_ManyIsoforms_WithMods"); + } + + [Test] + [TestCaseSource(nameof(GetDigestionCountTestCases))] + public static void WriteDigestionCountFiles_IsCorrectFromSearchTask(DigestionCountTestCase testCase) + { + // Arrange + string outDirectory = Path.Combine(TestContext.CurrentContext.TestDirectory, "DigestionCountTest"); + if (Directory.Exists(outDirectory)) + Directory.Delete(outDirectory, true); + + var variableMods = testCase.UseVariableMods + ? new List<(string, string)> + { + ("Common Variable", "Oxidation on M"), ("Common Biological", "Acetylation on A"), + ("Common Biological", "Acetylation on G"), ("Common Biological", "Acetylation on K"), + ("Common Biological", "Acetylation on M"), ("Common Biological", "Acetylation on P"), + ("Common Biological", "Acetylation on S"), ("Common Biological", "Acetylation on T"), + ("Common Biological", "Acetylation on X"), ("Common Biological", "Carboxylation on D"), + ("Common Biological", "Carboxylation on E"), ("Common Biological", "Carboxylation on K"), + ("Common Biological", "Crotonylation on K"), ("Common Biological", "Dimethylation on K"), + ("Common Biological", "Dimethylation on R"), ("Common Biological", "Formylation on K"), + ("Common Biological", "HexNAc on Nxs"), ("Common Biological", "HexNAc on Nxt"), + ("Common Biological", "HexNAc on S"), ("Common Biological", "HexNAc on T"), + ("Common Biological", "Hydroxylation on K"), ("Common Biological", "Hydroxylation on N"), + ("Common Biological", "Hydroxylation on P"), ("Common Biological", "Methylation on K"), + ("Common Biological", "Methylation on Q"), ("Common Biological", "Methylation on R"), + ("Common Biological", "Phosphorylation on S"), ("Common Biological", "Phosphorylation on T"), + ("Common Biological", "Phosphorylation on Y"), ("Common Biological", "Sulfonation on Y"), + ("Common Biological", "Trimethylation on K") + } + : []; + + string searchTaskId = "test"; + DigestionParams digestionParams = new DigestionParams(maxModificationIsoforms: testCase.MaxIsoforms, maxMissedCleavages: 0, minPeptideLength: 3); + var db = new List() { new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, testCase.DbPath), false) }; + var files = new List() { Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sliced_b6.mzML") }; + var tasks = new List<(string, MetaMorpheusTask)>{ (searchTaskId, new SearchTask + { + CommonParameters = new CommonParameters(digestionParams: digestionParams, listOfModsVariable: variableMods), + SearchParameters = new SearchParameters + { + DoParsimony = true, + SearchType = SearchType.Classic, + SearchTarget = true, + DecoyType = DecoyType.None, + WriteDigestionProductCountFile = true + }, + })}; + + // convert string modifications to Modification + object[] parameters = new object[] { "taskId", null, null, null }; + var modConversionMethod = typeof(MetaMorpheusTask).GetMethod("LoadModifications", BindingFlags.NonPublic | BindingFlags.Instance); + modConversionMethod!.Invoke(tasks.First().Item2, parameters); + List variableModifications = (List)parameters[1]; + + // Act + var runner = new EverythingRunnerEngine(tasks, files, db, outDirectory); + runner.Run(); + + // Pull Results from files and calculate from digestion + var proteins = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, testCase.DbPath), true, DecoyType.None, false, out var errors); + var digestionResults = proteins.ToDictionary(p => p.Accession, p => p.Digest(digestionParams, [], variableModifications).ToList()); + var digestionHistResults = digestionResults.GroupBy(p => p.Value.Count).ToDictionary(p => p.Key, p => p.Count()); + var byProteinLines = File.ReadAllLines(Path.Combine(outDirectory, searchTaskId, "DigestionCountsByProteins.tsv")); + var histogramLines = File.ReadAllLines(Path.Combine(outDirectory, searchTaskId, "DigestionCountHistogram.tsv")); + + // Assert + Assert.That(byProteinLines.Length, Is.EqualTo(proteins.Count + 1)); + for (int i = 1; i < byProteinLines.Length; i++) + { + var split = byProteinLines[i].Split('\t'); + Assert.That(split.Length, Is.EqualTo(2)); + + var writtenAccession = split[0]; + var writtenCount = int.Parse(split[1]); + + Assert.That(writtenCount, Is.EqualTo(digestionResults[writtenAccession].Count)); + } + + Assert.That(histogramLines.Length, Is.EqualTo(digestionHistResults.Count + 1)); + for (int i = 1; i < histogramLines.Length; i++) + { + var split = histogramLines[i].Split('\t'); + Assert.That(split.Length, Is.EqualTo(2)); + + var writtenDigestionCount = int.Parse(split[0]); + var writtenProteinCount = int.Parse(split[1]); + + Assert.That(writtenProteinCount, Is.EqualTo(digestionHistResults[writtenDigestionCount])); + } + + // Cleanup + Directory.Delete(outDirectory, true); + } } } \ No newline at end of file From 4429039201650fb772f93cd5c349831cfbfbc07c Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 19:22:32 -0600 Subject: [PATCH 09/16] Ensured writing consistency with multiple data files --- MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs b/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs index 6fda84801..fa10034fb 100644 --- a/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs +++ b/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs @@ -398,7 +398,7 @@ public static void WriteDigestionCountFiles_IsCorrectFromSearchTask(DigestionCou string searchTaskId = "test"; DigestionParams digestionParams = new DigestionParams(maxModificationIsoforms: testCase.MaxIsoforms, maxMissedCleavages: 0, minPeptideLength: 3); var db = new List() { new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, testCase.DbPath), false) }; - var files = new List() { Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sliced_b6.mzML") }; + var files = new List() { Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sliced_b6.mzML"), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "Q9UHB6_Chym_snip.mzML") }; var tasks = new List<(string, MetaMorpheusTask)>{ (searchTaskId, new SearchTask { CommonParameters = new CommonParameters(digestionParams: digestionParams, listOfModsVariable: variableMods), From ffcd71381646aef6fde3850a1b10b4c8de292901 Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 19:31:40 -0600 Subject: [PATCH 10/16] Cleanup and Additional Comments --- MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs | 2 +- MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index addbdd3bd..763748f8f 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -113,7 +113,7 @@ public MyTaskResults Run() UpdateSpectralLibrary(); } - if (DigestionCountDictionary != null && DigestionCountDictionary.Any()) + if (DigestionCountDictionary != null && DigestionCountDictionary.Any()) // Will be null or empty if no digestion count output file is desired. { WriteDigestionCountByProtein(); WriteDigestionCountHistogram(); diff --git a/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs b/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs index fa10034fb..ac7c4ffd9 100644 --- a/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs +++ b/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs @@ -6,7 +6,6 @@ using System.Reflection; using EngineLayer; using NUnit.Framework; -using Omics.Digestion; using Omics.Modifications; using Proteomics.ProteolyticDigestion; using TaskLayer; From 7c3e2cafc2f5bca214579d19076bb4f3e2df9a02 Mon Sep 17 00:00:00 2001 From: nbollis Date: Tue, 28 Jan 2025 21:25:50 -0600 Subject: [PATCH 11/16] Added more comments to tooltop --- MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml b/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml index aa5ef525e..7c8bf1794 100644 --- a/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml +++ b/MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml @@ -1176,6 +1176,10 @@ Checking this box will create an additional output file with a histogram of the number of digestion products per protein. + + The number of digestion products is limited by the MaxModsPerPeptide parameter and limited to the MaxModifiedIsoforms parameter for each primary sequence generated by each database entry + + Multiple base sequences can be generated per protein due to variable methionine and splice variants if annotated in a database From ca062ca0a46749c1ce51d69fb6c197f376711509 Mon Sep 17 00:00:00 2001 From: Nic Bollis Date: Wed, 29 Jan 2025 15:27:27 -0600 Subject: [PATCH 12/16] Ensured thread safety for extension methods with concurent dictionary. --- .../EngineLayer/Util/DictionaryExtensions.cs | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs index 9b92a7ff8..9c3d3466f 100644 --- a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs +++ b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs @@ -1,4 +1,5 @@ using Nett; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Numerics; @@ -17,13 +18,20 @@ public static class DictionaryExtensions /// The value to add to the list associated with the specified key. public static void AddOrCreate(this IDictionary> dictionary, TKey key, TValues value) { - if (dictionary.TryGetValue(key, out IList values)) + if (dictionary is ConcurrentDictionary> concurrentDictionary) { - values.Add(value); + concurrentDictionary.AddOrUpdate(key, new List { value }, (k, v) => { v.Add(value); return v; }); } else { - dictionary.Add(key, new List { value }); + if (dictionary.TryGetValue(key, out IList values)) + { + values.Add(value); + } + else + { + dictionary.Add(key, new List { value }); + } } } @@ -38,13 +46,20 @@ public static void AddOrCreate(this IDictionary(this IDictionary dictionary, TKey key) where TValue : INumber { - if (dictionary.TryGetValue(key, out TValue value)) + if (dictionary is ConcurrentDictionary concurrentDictionary) { - dictionary[key] = value + TValue.One; + concurrentDictionary.AddOrUpdate(key, TValue.One, (k, v) => v + TValue.One); } else { - dictionary.Add(key, TValue.One); + if (dictionary.TryGetValue(key, out TValue value)) + { + dictionary[key] = value + TValue.One; + } + else + { + dictionary.Add(key, TValue.One); + } } } From 6d7471a5fa5f6e5224c79b39a9e1c2b0d3e6317d Mon Sep 17 00:00:00 2001 From: Nic Bollis Date: Wed, 29 Jan 2025 16:16:46 -0600 Subject: [PATCH 13/16] Added tests for thread safety --- .../EngineLayer/Util/DictionaryExtensions.cs | 54 +++++++++++++-- .../Test/DictionaryExtensionsTests.cs | 67 +++++++++++++++++++ 2 files changed, 114 insertions(+), 7 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs index 9c3d3466f..4e3a12034 100644 --- a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs +++ b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs @@ -1,4 +1,5 @@ using Nett; +using Newtonsoft.Json.Linq; using System.Collections.Concurrent; using System.Collections.Generic; using System.Numerics; @@ -7,6 +8,8 @@ namespace EngineLayer; public static class DictionaryExtensions { + private static readonly object AddOrCreateLock = new object(); + /// /// Adds a value to the list associated with the specified key in the dictionary. /// If the key does not exist, a new list is created with the value and added to the dictionary. @@ -16,21 +19,58 @@ public static class DictionaryExtensions /// The dictionary to operate on. /// The key whose value list to add to or create. /// The value to add to the list associated with the specified key. + /// + /// This is not thread-safe! + /// public static void AddOrCreate(this IDictionary> dictionary, TKey key, TValues value) { - if (dictionary is ConcurrentDictionary> concurrentDictionary) + if (dictionary.TryGetValue(key, out IList values)) { - concurrentDictionary.AddOrUpdate(key, new List { value }, (k, v) => { v.Add(value); return v; }); + values.Add(value); } else { - if (dictionary.TryGetValue(key, out IList values)) + dictionary.Add(key, new List { value }); + } + } + + /// + /// Adds a value to the list associated with the specified key in the dictionary. + /// If the key does not exist, a new list is created with the value and added to the dictionary. + /// + /// The type of the keys in the dictionary. + /// The type of the values in the lists. + /// The dictionary to operate on. + /// The key whose value list to add to or create. + /// The value to add to the list associated with the specified key. + /// + /// This is thread safe for all dictionary types. + /// + public static void AddOrCreateThreadSafe(this IDictionary> dictionary, TKey key, TValues value) + { + if (dictionary is ConcurrentDictionary> concurrentDictionary) + { + concurrentDictionary.AddOrUpdate(key, new List { value }, (k, v) => { - values.Add(value); - } - else + lock (AddOrCreateLock) + { + v.Add(value); + return v; + } + }); + } + else + { + lock (AddOrCreateLock) { - dictionary.Add(key, new List { value }); + if (dictionary.TryGetValue(key, out IList values)) + { + values.Add(value); + } + else + { + dictionary.Add(key, new List { value }); + } } } } diff --git a/MetaMorpheus/Test/DictionaryExtensionsTests.cs b/MetaMorpheus/Test/DictionaryExtensionsTests.cs index bb315f276..1dca8b6d7 100644 --- a/MetaMorpheus/Test/DictionaryExtensionsTests.cs +++ b/MetaMorpheus/Test/DictionaryExtensionsTests.cs @@ -1,6 +1,8 @@ using EngineLayer; using NUnit.Framework; +using System.Collections.Concurrent; using System.Collections.Generic; +using System.Threading.Tasks; namespace Test { @@ -131,6 +133,71 @@ public void Increment_AddsAndIncrementsNewKey() Assert.That(dictionary.ContainsKey("key1")); Assert.That(dictionary["key1"], Is.EqualTo(2)); } + + [Test] + public void Increment_ThreadSafeWithConcurrentDictionary() + { + // Arrange + var dictionary = new ConcurrentDictionary(); + var tasks = new List(); + + // Act + for (int i = 0; i < 1000; i++) + { + tasks.Add(Task.Run(() => dictionary.Increment("key1"))); + } + Task.WaitAll(tasks.ToArray()); + + // Assert + Assert.That(dictionary["key1"], Is.EqualTo(1000)); + } + + [Test] + public void AddOrCreate_ThreadSafeWithConcurrentDictionary() + { + // Arrange + var dictionary = new ConcurrentDictionary>(); + var tasks = new List(); + + // Act + for (int i = 0; i < 1000; i++) + { + int value = i; + tasks.Add(Task.Run(() => dictionary.AddOrCreateThreadSafe("key1", value))); + } + Task.WaitAll(tasks.ToArray()); + + // Assert + Assert.That(dictionary["key1"].Count, Is.EqualTo(1000)); + for (int i = 0; i < 1000; i++) + { + Assert.That(dictionary["key1"], Contains.Item(i)); + } + } + + [Test] + public void AddOrCreate_ThreadSafeWithDictionary() + { + // Arrange + var dictionary = new Dictionary>(); + var tasks = new List(); + + // Act + for (int i = 0; i < 1000; i++) + { + int value = i; + tasks.Add(Task.Run(() => dictionary.AddOrCreateThreadSafe("key1", value))); + } + Task.WaitAll(tasks.ToArray()); + + // Assert + Assert.That(dictionary["key1"].Count, Is.EqualTo(1000)); + for (int i = 0; i < 1000; i++) + { + Assert.That(dictionary["key1"], Contains.Item(i)); + } + } + [Test] public void IsNullOrEmpty_ReturnsTrueForNullDictionary() { From 73bedd96a10f044c8abdd80ef642e518f15f4b6a Mon Sep 17 00:00:00 2001 From: nbollis Date: Thu, 30 Jan 2025 13:59:42 -0600 Subject: [PATCH 14/16] AddOrCreateThreadSafe take in lock object --- .../EngineLayer/Util/DictionaryExtensions.cs | 14 ++++++-------- MetaMorpheus/Test/DictionaryExtensionsTests.cs | 6 ++++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs index 4e3a12034..b851f5462 100644 --- a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs +++ b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs @@ -1,6 +1,4 @@ -using Nett; -using Newtonsoft.Json.Linq; -using System.Collections.Concurrent; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Numerics; @@ -8,8 +6,6 @@ namespace EngineLayer; public static class DictionaryExtensions { - private static readonly object AddOrCreateLock = new object(); - /// /// Adds a value to the list associated with the specified key in the dictionary. /// If the key does not exist, a new list is created with the value and added to the dictionary. @@ -43,16 +39,18 @@ public static void AddOrCreate(this IDictionaryThe dictionary to operate on. /// The key whose value list to add to or create. /// The value to add to the list associated with the specified key. + /// Object used to lock this specific dictionary /// /// This is thread safe for all dictionary types. /// - public static void AddOrCreateThreadSafe(this IDictionary> dictionary, TKey key, TValues value) + public static void AddOrCreateThreadSafe(this IDictionary> dictionary, TKey key, TValues value, object lockObject) { if (dictionary is ConcurrentDictionary> concurrentDictionary) { concurrentDictionary.AddOrUpdate(key, new List { value }, (k, v) => { - lock (AddOrCreateLock) + // must lock inside the add or update as the List.Add method is not thread safe. + lock (lockObject) { v.Add(value); return v; @@ -61,7 +59,7 @@ public static void AddOrCreateThreadSafe(this IDictionary values)) { diff --git a/MetaMorpheus/Test/DictionaryExtensionsTests.cs b/MetaMorpheus/Test/DictionaryExtensionsTests.cs index 1dca8b6d7..2a04ba99b 100644 --- a/MetaMorpheus/Test/DictionaryExtensionsTests.cs +++ b/MetaMorpheus/Test/DictionaryExtensionsTests.cs @@ -158,12 +158,13 @@ public void AddOrCreate_ThreadSafeWithConcurrentDictionary() // Arrange var dictionary = new ConcurrentDictionary>(); var tasks = new List(); + var lockObject = new object(); // Act for (int i = 0; i < 1000; i++) { int value = i; - tasks.Add(Task.Run(() => dictionary.AddOrCreateThreadSafe("key1", value))); + tasks.Add(Task.Run(() => dictionary.AddOrCreateThreadSafe("key1", value, lockObject))); } Task.WaitAll(tasks.ToArray()); @@ -181,12 +182,13 @@ public void AddOrCreate_ThreadSafeWithDictionary() // Arrange var dictionary = new Dictionary>(); var tasks = new List(); + var lockObject = new object(); // Act for (int i = 0; i < 1000; i++) { int value = i; - tasks.Add(Task.Run(() => dictionary.AddOrCreateThreadSafe("key1", value))); + tasks.Add(Task.Run(() => dictionary.AddOrCreateThreadSafe("key1", value, lockObject))); } Task.WaitAll(tasks.ToArray()); From 1cb8dfe212d4b41817a8154c316269cf227618d3 Mon Sep 17 00:00:00 2001 From: nbollis Date: Thu, 30 Jan 2025 15:24:54 -0600 Subject: [PATCH 15/16] Adjusted Dictionary Extensions Increment method to --- .../EngineLayer/Util/DictionaryExtensions.cs | 18 ++-- .../Test/DictionaryExtensionsTests.cs | 84 ++++++++++++++++++- 2 files changed, 94 insertions(+), 8 deletions(-) diff --git a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs index b851f5462..2dc376849 100644 --- a/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs +++ b/MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs @@ -1,4 +1,5 @@ -using System.Collections.Concurrent; +using Nett; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Numerics; @@ -74,29 +75,32 @@ public static void AddOrCreateThreadSafe(this IDictionary - /// Increments the value associated with the specified key in the dictionary. + /// Increments the value associated with the specified key in the dictionary by a defined number with a default of one. /// If the key does not exist, a new entry is created with the value set to one. /// /// The type of the keys in the dictionary. /// The type of the values in the dictionary, which must implement . /// The dictionary to operate on. /// The key whose value to increment or create. - public static void Increment(this IDictionary dictionary, TKey key) - where TValue : INumber + /// The amount to increment by with a default of one + public static void Increment(this IDictionary dictionary, TKey key, TValue incrementBy = default) + where TValue : INumber { + TValue incrementValue = incrementBy.Equals(default) ? TValue.One : incrementBy; + if (dictionary is ConcurrentDictionary concurrentDictionary) { - concurrentDictionary.AddOrUpdate(key, TValue.One, (k, v) => v + TValue.One); + concurrentDictionary.AddOrUpdate(key, incrementValue, (k, v) => v + incrementValue); } else { if (dictionary.TryGetValue(key, out TValue value)) { - dictionary[key] = value + TValue.One; + dictionary[key] = value + incrementValue; } else { - dictionary.Add(key, TValue.One); + dictionary.Add(key, incrementValue); } } } diff --git a/MetaMorpheus/Test/DictionaryExtensionsTests.cs b/MetaMorpheus/Test/DictionaryExtensionsTests.cs index 2a04ba99b..563f2c2b5 100644 --- a/MetaMorpheus/Test/DictionaryExtensionsTests.cs +++ b/MetaMorpheus/Test/DictionaryExtensionsTests.cs @@ -144,7 +144,7 @@ public void Increment_ThreadSafeWithConcurrentDictionary() // Act for (int i = 0; i < 1000; i++) { - tasks.Add(Task.Run(() => dictionary.Increment("key1"))); + tasks.Add(Task.Run(() => dictionary.Increment("key1", 1))); } Task.WaitAll(tasks.ToArray()); @@ -152,6 +152,88 @@ public void Increment_ThreadSafeWithConcurrentDictionary() Assert.That(dictionary["key1"], Is.EqualTo(1000)); } + [Test] + public void Increment_IncrementsBySpecifiedValue() + { + // Arrange + var dictionary = new Dictionary + { + { "key1", 1 } + }; + + // Act + dictionary.Increment("key1", 5); + + // Assert + Assert.That(dictionary["key1"], Is.EqualTo(6)); + } + + [Test] + public void Increment_AddsNewKeyWithSpecifiedValue() + { + // Arrange + var dictionary = new Dictionary(); + + // Act + dictionary.Increment("key1", 5); + + // Assert + Assert.That(dictionary.ContainsKey("key1")); + Assert.That(dictionary["key1"], Is.EqualTo(5)); + } + + [Test] + public void Increment_IncrementsBySpecifiedValueMultipleTimes() + { + // Arrange + var dictionary = new Dictionary + { + { "key1", 1 } + }; + + // Act + dictionary.Increment("key1", 2); + dictionary.Increment("key1", 3); + dictionary.Increment("key1", 4); + + // Assert + Assert.That(dictionary["key1"], Is.EqualTo(10)); + } + + [Test] + public void Increment_AddsAndIncrementsNewKeyBySpecifiedValue() + { + // Arrange + var dictionary = new Dictionary(); + + // Act + dictionary.Increment("key1", 2); + dictionary.Increment("key1", 3); + + // Assert + Assert.That(dictionary.ContainsKey("key1")); + Assert.That(dictionary["key1"], Is.EqualTo(5)); + } + + [Test] + public void Increment_ThreadSafeWithConcurrentDictionaryBySpecifiedValue() + { + // Arrange + var dictionary = new ConcurrentDictionary(); + var tasks = new List(); + + // Act + for (int i = 0; i < 1000; i++) + { + int value = i % 10 + 1; // Increment by values from 1 to 10 + tasks.Add(Task.Run(() => dictionary.Increment("key1", value))); + } + Task.WaitAll(tasks.ToArray()); + + // Assert + Assert.That(dictionary["key1"], Is.EqualTo(5500)); + } + [Test] public void AddOrCreate_ThreadSafeWithConcurrentDictionary() { From 5d67fd454be6c506b1daab51584d086c473c5b7c Mon Sep 17 00:00:00 2001 From: nbollis Date: Thu, 30 Jan 2025 16:30:28 -0600 Subject: [PATCH 16/16] Update digestion count tracking and file output format Updated `DigestionCountDictionary` to track by protein accession and base sequence. Modified `PostSearchAnalysisTask` and `SearchTask` to use the new type. Updated file output logic to include primary sequence and added checks for `WriteDecoys` parameter. Enhanced unit tests to reflect these changes and added new tests for decoy handling. --- .../ClassicSearch/ClassicSearchEngine.cs | 6 +- .../SearchTask/PostSearchAnalysisTask.cs | 12 +- .../TaskLayer/SearchTask/SearchTask.cs | 9 +- .../Test/PostSearchAnalysisTaskTests.cs | 130 +++++++++++++++--- 4 files changed, 128 insertions(+), 29 deletions(-) diff --git a/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs b/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs index 733b234f0..d4fd987d1 100644 --- a/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs +++ b/MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs @@ -27,7 +27,7 @@ public class ClassicSearchEngine : MetaMorpheusEngine private readonly bool WriteSpectralLibrary; private readonly bool WriteDigestionCounts; private readonly object[] Locks; - public readonly ConcurrentDictionary DigestionCountDictionary; // Used to track the amount of digestion products from each protein when the option is enabled. + public readonly ConcurrentDictionary<(string Accession, string BaseSequence), int> DigestionCountDictionary; // Used to track the amount of digestion products from each protein when the option is enabled. public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans, List variableModifications, List fixedModifications, List silacLabels, SilacLabel startLabel, SilacLabel endLabel, @@ -112,8 +112,8 @@ protected override MetaMorpheusEngineResults RunSpecific() foreach (PeptideWithSetModifications peptide in Proteins[i].Digest(CommonParameters.DigestionParams, FixedModifications, VariableModifications, SilacLabels, TurnoverLabels)) { if (WriteDigestionCounts) - DigestionCountDictionary.Increment(peptide.Parent.Accession); - + DigestionCountDictionary.Increment((peptide.Parent.Accession, peptide.BaseSequence)); + PeptideWithSetModifications reversedOnTheFlyDecoy = null; if (SpectralLibrary != null) diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 763748f8f..a5e789fff 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -38,7 +38,7 @@ public class PostSearchAnalysisTask : MetaMorpheusTask /// /// Used for storage of results for writing digestion product counts to a .tsv. /// - internal IDictionary? DigestionCountDictionary { get; set; } + internal IDictionary<(string Accession, string BaseSeqeunce), int> DigestionCountDictionary { get; set; } public PostSearchAnalysisTask() : base(MyTask.Search) { @@ -113,7 +113,7 @@ public MyTaskResults Run() UpdateSpectralLibrary(); } - if (DigestionCountDictionary != null && DigestionCountDictionary.Any()) // Will be null or empty if no digestion count output file is desired. + if (Parameters.SearchParameters.WriteDigestionProductCountFile) { WriteDigestionCountByProtein(); WriteDigestionCountHistogram(); @@ -1963,10 +1963,12 @@ private void WriteDigestionCountByProtein() // write all values to file using (var writer = new StreamWriter(countByProteinPath)) { - writer.WriteLine("Protein Accession\tDigestion Products"); + writer.WriteLine("Protein Accession\tPrimary Sequence\tDigestion Products"); foreach (var proteinEntry in DigestionCountDictionary!) { - writer.WriteLine($"{proteinEntry.Key}\t{proteinEntry.Value}"); + if (!Parameters.SearchParameters.WriteDecoys && proteinEntry.Key.Accession.StartsWith("DECOY")) + continue; + writer.WriteLine($"{proteinEntry.Key.Accession}\t{proteinEntry.Key.BaseSeqeunce}\t{proteinEntry.Value}"); } } FinishedWritingFile(countByProteinPath, nestedIds); @@ -1987,6 +1989,8 @@ private void WriteDigestionCountHistogram() var countDictionary = new Dictionary(CommonParameters.DigestionParams.MaxModificationIsoforms); foreach (var proteinEntry in DigestionCountDictionary!) { + if (!Parameters.SearchParameters.WriteDecoys && proteinEntry.Key.Accession.StartsWith("DECOY")) + continue; countDictionary.Increment(proteinEntry.Value); } diff --git a/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs b/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs index 07441761d..92cb59e3e 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs @@ -203,7 +203,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List { taskId, "Individual Spectra Files" }); Dictionary numMs2SpectraPerFile = new Dictionary(); - IDictionary digestionCountDictionary = null; + bool collectedDigestionInformation = false; + IDictionary<(string Accession, string BaseSequence), int> digestionCountDictionary = null; for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } @@ -379,9 +380,9 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List + var digestionCountDictionary = new Dictionary<(string Accession, string BaseSeqeunce), int> { - { "Protein1", 5 }, - { "Protein2", 10 } + { ("Protein1", "SEQUENCE1"), 5 }, + { ("Protein2", "SEQUENCE2"), 10 } }; task.GetType().GetProperty("DigestionCountDictionary", BindingFlags.NonPublic | BindingFlags.Instance).SetValue(task, digestionCountDictionary); @@ -280,9 +281,9 @@ public static void WriteDigestionCountsByProtein_WritesCorrectFile() var lines = File.ReadAllLines(expectedFilePath); Assert.That(lines.Length, Is.EqualTo(3)); - Assert.That(lines[0], Is.EqualTo("Protein Accession\tDigestion Products")); - Assert.That(lines[1], Is.EqualTo("Protein1\t5")); - Assert.That(lines[2], Is.EqualTo("Protein2\t10")); + Assert.That(lines[0], Is.EqualTo("Protein Accession\tPrimary Sequence\tDigestion Products")); + Assert.That(lines[1], Is.EqualTo("Protein1\tSEQUENCE1\t5")); + Assert.That(lines[2], Is.EqualTo("Protein2\tSEQUENCE2\t10")); // Cleanup Directory.Delete(parameters.OutputFolder, true); @@ -299,15 +300,16 @@ public static void WriteDigestionCountsHistogram_WritesCorrectFile() Directory.CreateDirectory(outputDirectory); var parameters = new PostSearchAnalysisParameters { + SearchParameters = new(), OutputFolder = outputDirectory, SearchTaskId = "TestTask" }; task.GetType().GetProperty("Parameters").SetValue(task, parameters); - var digestionCountDictionary = new Dictionary + var digestionCountDictionary = new Dictionary<(string Accession, string BaseSeqeunce), int> { - { "Protein1", 5 }, - { "Protein2", 10 }, - { "Protein3", 5 } + { ("Protein1", "SEQUENCE1"), 5 }, + { ("Protein2", "SEQUENCE2"), 10 }, + { ("Protein3", "SEQUENCE3"), 5 } }; task.GetType().GetProperty("DigestionCountDictionary", BindingFlags.NonPublic | BindingFlags.Instance).SetValue(task, digestionCountDictionary); @@ -325,7 +327,7 @@ public static void WriteDigestionCountsHistogram_WritesCorrectFile() Assert.That(lines[2], Is.EqualTo("10\t1")); // Cleanup - Directory.Delete(parameters.OutputFolder, true); + Directory.Delete(outputDirectory, true); } public record DigestionCountTestCase(string DbPath, int MaxIsoforms, bool UseVariableMods, string Name) @@ -423,22 +425,26 @@ public static void WriteDigestionCountFiles_IsCorrectFromSearchTask(DigestionCou // Pull Results from files and calculate from digestion var proteins = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, testCase.DbPath), true, DecoyType.None, false, out var errors); - var digestionResults = proteins.ToDictionary(p => p.Accession, p => p.Digest(digestionParams, [], variableModifications).ToList()); - var digestionHistResults = digestionResults.GroupBy(p => p.Value.Count).ToDictionary(p => p.Key, p => p.Count()); + var digestionResults = proteins.SelectMany(p => p.Digest(digestionParams, [], variableModifications)) + .GroupBy(p => (p.Parent.Accession, p.BaseSequence)) + .ToDictionary(p => p.Key, p => p.ToArray()); + var digestionHistResults = digestionResults.GroupBy(p => p.Value.Length) + .ToDictionary(p => p.Key, p => p.Count()); var byProteinLines = File.ReadAllLines(Path.Combine(outDirectory, searchTaskId, "DigestionCountsByProteins.tsv")); var histogramLines = File.ReadAllLines(Path.Combine(outDirectory, searchTaskId, "DigestionCountHistogram.tsv")); // Assert - Assert.That(byProteinLines.Length, Is.EqualTo(proteins.Count + 1)); + Assert.That(byProteinLines.Length, Is.EqualTo(digestionResults.Count + 1)); for (int i = 1; i < byProteinLines.Length; i++) { var split = byProteinLines[i].Split('\t'); - Assert.That(split.Length, Is.EqualTo(2)); + Assert.That(split.Length, Is.EqualTo(3)); var writtenAccession = split[0]; - var writtenCount = int.Parse(split[1]); + var writtenSequence = split[1]; + var writtenCount = int.Parse(split[2]); - Assert.That(writtenCount, Is.EqualTo(digestionResults[writtenAccession].Count)); + Assert.That(writtenCount, Is.EqualTo(digestionResults[(writtenAccession, writtenSequence)].Length)); } Assert.That(histogramLines.Length, Is.EqualTo(digestionHistResults.Count + 1)); @@ -456,5 +462,93 @@ public static void WriteDigestionCountFiles_IsCorrectFromSearchTask(DigestionCou // Cleanup Directory.Delete(outDirectory, true); } + [Test] + public static void WriteDigestionCountFiles_DoesNotIncludeDecoys_WhenNotIntended() + { + // Arrange + var task = new PostSearchAnalysisTask(); + var outputDirectory = Path.Combine(TestContext.CurrentContext.WorkDirectory, "DigestionCountTest"); + if (Directory.Exists(outputDirectory)) + Directory.Delete(outputDirectory, true); + Directory.CreateDirectory(outputDirectory); + var parameters = new PostSearchAnalysisParameters + { + OutputFolder = outputDirectory, + SearchTaskId = "TestTask", + SearchParameters = new SearchParameters + { + WriteDecoys = false + } + }; + + task.GetType().GetProperty("Parameters").SetValue(task, parameters); + var digestionCountDictionary = new Dictionary<(string Accession, string BaseSeqeunce), int> + { + { ("DECOY_Protein1", "SEQUENCE1"), 5 }, + { ("Protein2", "SEQUENCE2"), 10 } + }; + task.GetType().GetProperty("DigestionCountDictionary", BindingFlags.NonPublic | BindingFlags.Instance).SetValue(task, digestionCountDictionary); + + // Act + var method = task.GetType().GetMethod("WriteDigestionCountByProtein", BindingFlags.NonPublic | BindingFlags.Instance); + method!.Invoke(task, null); + + // Assert + var expectedFilePath = Path.Combine(parameters.OutputFolder, "DigestionCountsByProteins.tsv"); + Assert.That(File.Exists(expectedFilePath), Is.True); + + var lines = File.ReadAllLines(expectedFilePath); + Assert.That(lines.Length, Is.EqualTo(2)); + Assert.That(lines[0], Is.EqualTo("Protein Accession\tPrimary Sequence\tDigestion Products")); + Assert.That(lines[1], Is.EqualTo("Protein2\tSEQUENCE2\t10")); + + // Cleanup + Directory.Delete(parameters.OutputFolder, true); + } + + [Test] + public static void WriteDigestionCountFiles_IncludesDecoys_WhenIntended() + { + // Arrange + var task = new PostSearchAnalysisTask(); + var outputDirectory = Path.Combine(TestContext.CurrentContext.WorkDirectory, "DigestionCountTest"); + if (Directory.Exists(outputDirectory)) + Directory.Delete(outputDirectory, true); + Directory.CreateDirectory(outputDirectory); + var parameters = new PostSearchAnalysisParameters + { + OutputFolder = outputDirectory, + SearchTaskId = "TestTask", + SearchParameters = new SearchParameters + { + WriteDecoys = true + } + }; + + task.GetType().GetProperty("Parameters").SetValue(task, parameters); + var digestionCountDictionary = new Dictionary<(string Accession, string BaseSeqeunce), int> + { + { ("DECOY_Protein1", "SEQUENCE1"), 5 }, + { ("Protein2", "SEQUENCE2"), 10 } + }; + task.GetType().GetProperty("DigestionCountDictionary", BindingFlags.NonPublic | BindingFlags.Instance).SetValue(task, digestionCountDictionary); + + // Act + var method = task.GetType().GetMethod("WriteDigestionCountByProtein", BindingFlags.NonPublic | BindingFlags.Instance); + method!.Invoke(task, null); + + // Assert + var expectedFilePath = Path.Combine(parameters.OutputFolder, "DigestionCountsByProteins.tsv"); + Assert.That(File.Exists(expectedFilePath), Is.True); + + var lines = File.ReadAllLines(expectedFilePath); + Assert.That(lines.Length, Is.EqualTo(3)); + Assert.That(lines[0], Is.EqualTo("Protein Accession\tPrimary Sequence\tDigestion Products")); + Assert.That(lines[1], Is.EqualTo("DECOY_Protein1\tSEQUENCE1\t5")); + Assert.That(lines[2], Is.EqualTo("Protein2\tSEQUENCE2\t10")); + + // Cleanup + Directory.Delete(parameters.OutputFolder, true); + } } } \ No newline at end of file