Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional Digestion Count Output #2460

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions MetaMorpheus/EngineLayer/ClassicSearch/ClassicSearchEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Threading;
using Omics.Modifications;
using System.Collections.Concurrent;

Expand All @@ -26,12 +25,14 @@ public class ClassicSearchEngine : MetaMorpheusEngine
private readonly Ms2ScanWithSpecificMass[] ArrayOfSortedMS2Scans;
private readonly double[] MyScanPrecursorMasses;
private readonly bool WriteSpectralLibrary;
private readonly bool WriteDigestionCounts;
private readonly object[] Locks;
public readonly ConcurrentDictionary<(string Accession, string BaseSequence), int> DigestionCountDictionary; // Used to track the amount of digestion products from each protein when the option is enabled.

public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans,
List<Modification> variableModifications, List<Modification> fixedModifications, List<SilacLabel> silacLabels, SilacLabel startLabel, SilacLabel endLabel,
List<Protein> proteinList, MassDiffAcceptor searchMode, CommonParameters commonParameters, List<(string FileName, CommonParameters Parameters)> fileSpecificParameters,
SpectralLibrary spectralLibrary, List<string> nestedIds, bool writeSpectralLibrary)
SpectralLibrary spectralLibrary, List<string> nestedIds, bool writeSpectralLibrary, bool writeDigestionCounts = false)
: base(commonParameters, fileSpecificParameters, nestedIds)
{
PeptideSpectralMatches = globalPsms;
Expand All @@ -48,6 +49,8 @@ public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[]
SearchMode = searchMode;
SpectralLibrary = spectralLibrary;
WriteSpectralLibrary = writeSpectralLibrary;
WriteDigestionCounts = writeDigestionCounts;
DigestionCountDictionary = new();

// Create one lock for each PSM to ensure thread safety
Locks = new object[PeptideSpectralMatches.Length];
Expand Down Expand Up @@ -108,6 +111,9 @@ protected override MetaMorpheusEngineResults RunSpecific()
// digest each protein into peptides and search for each peptide in all spectra within precursor mass tolerance
foreach (PeptideWithSetModifications peptide in Proteins[i].Digest(CommonParameters.DigestionParams, FixedModifications, VariableModifications, SilacLabels, TurnoverLabels))
{
if (WriteDigestionCounts)
DigestionCountDictionary.Increment((peptide.Parent.Accession, peptide.BaseSequence));

PeptideWithSetModifications reversedOnTheFlyDecoy = null;

if (SpectralLibrary != null)
Expand Down
121 changes: 121 additions & 0 deletions MetaMorpheus/EngineLayer/Util/DictionaryExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
using Nett;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Numerics;

namespace EngineLayer;

public static class DictionaryExtensions
{
/// <summary>
/// Adds a value to the list associated with the specified key in the dictionary.
/// If the key does not exist, a new list is created with the value and added to the dictionary.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValues">The type of the values in the lists.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value list to add to or create.</param>
/// <param name="value">The value to add to the list associated with the specified key.</param>
/// <remarks>
/// This is not thread-safe!
/// </remarks>
public static void AddOrCreate<TKey, TValues>(this IDictionary<TKey, IList<TValues>> dictionary, TKey key, TValues value)
nbollis marked this conversation as resolved.
Show resolved Hide resolved
{
if (dictionary.TryGetValue(key, out IList<TValues> values))
{
values.Add(value);
}
else
{
dictionary.Add(key, new List<TValues> { value });
}
}

/// <summary>
/// Adds a value to the list associated with the specified key in the dictionary.
/// If the key does not exist, a new list is created with the value and added to the dictionary.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValues">The type of the values in the lists.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value list to add to or create.</param>
/// <param name="value">The value to add to the list associated with the specified key.</param>
/// <param name="lockObject">Object used to lock this specific dictionary</param>
/// <remarks>
/// This is thread safe for all dictionary types.
/// </remarks>
public static void AddOrCreateThreadSafe<TKey, TValues>(this IDictionary<TKey, IList<TValues>> dictionary, TKey key, TValues value, object lockObject)
{
if (dictionary is ConcurrentDictionary<TKey, IList<TValues>> concurrentDictionary)
{
concurrentDictionary.AddOrUpdate(key, new List<TValues> { value }, (k, v) =>
{
// must lock inside the add or update as the List.Add method is not thread safe.
lock (lockObject)
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you should call the lock inside the addOrUpdate method. That should be handled internally by the AddOrUpdate method

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to ensure the internal lists are thread safe. This is needed otherwise unit test AddOrCreate_ThreadSafeWithDictionary fails.

v.Add(value);
return v;
}
});
}
else
{
lock (lockObject)
{
if (dictionary.TryGetValue(key, out IList<TValues> values))
{
values.Add(value);
}
else
{
dictionary.Add(key, new List<TValues> { value });
}
}
}
}

/// <summary>
/// Increments the value associated with the specified key in the dictionary by a defined number with a default of one.
/// If the key does not exist, a new entry is created with the value set to one.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValue">The type of the values in the dictionary, which must implement <see cref="INumber{TValue}"/>.</typeparam>
/// <param name="dictionary">The dictionary to operate on.</param>
/// <param name="key">The key whose value to increment or create.</param>
/// <param name="incrementBy">The amount to increment by with a default of one</param>
public static void Increment<TKey, TValue>(this IDictionary<TKey, TValue> dictionary, TKey key, TValue incrementBy = default)
where TValue : INumber<TValue>
{
TValue incrementValue = incrementBy.Equals(default) ? TValue.One : incrementBy;

if (dictionary is ConcurrentDictionary<TKey, TValue> concurrentDictionary)
{
concurrentDictionary.AddOrUpdate(key, incrementValue, (k, v) => v + incrementValue);
}
else
{
if (dictionary.TryGetValue(key, out TValue value))
{
dictionary[key] = value + incrementValue;
}
else
{
dictionary.Add(key, incrementValue);
}
}
}

/// <summary>
/// Determines whether the dictionary is null or has no elements.
/// </summary>
/// <typeparam name="TKey">The type of the keys in the dictionary.</typeparam>
/// <typeparam name="TValue">The type of the values in the dictionary.</typeparam>
/// <param name="dictionary">The dictionary to check.</param>
/// <returns>
/// <c>true</c> if the dictionary is null or has no elements; otherwise, <c>false</c>.
/// </returns>
public static bool IsNullOrEmpty<TKey, TValue>(this IDictionary<TKey, TValue> dictionary)
{
return dictionary == null || dictionary.Count == 0;
}
}
12 changes: 12 additions & 0 deletions MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,18 @@
</TextBlock>
</ToolTipService.ToolTip>
</CheckBox>
<CheckBox x:Name="WriteDigestCountCheckBox" Margin="20 0 0 0"
Content="Write digestion product count histogram" IsEnabled="{Binding IsChecked, ElementName=ClassicSearchRadioButton}">
<ToolTipService.ToolTip>
<TextBlock>
Checking this box will create an additional output file with a histogram of the number of digestion products per protein.
<LineBreak/>
The number of digestion products is limited by the MaxModsPerPeptide parameter and limited to the MaxModifiedIsoforms parameter for each primary sequence generated by each database entry
<LineBreak/>
Multiple base sequences can be generated per protein due to variable methionine and splice variants if annotated in a database
</TextBlock>
</ToolTipService.ToolTip>
</CheckBox>
</StackPanel>
</Expander>
</GroupBox>
Expand Down
2 changes: 2 additions & 0 deletions MetaMorpheus/GUI/TaskWindows/SearchTaskWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ private void UpdateFieldsFromTask(SearchTask task)

OutputFileNameTextBox.Text = task.CommonParameters.TaskDescriptor;
CkbMzId.IsChecked = task.SearchParameters.WriteMzId;
WriteDigestCountCheckBox.IsChecked = task.SearchParameters.WriteDigestionProductCountFile;
WriteHighQPsmsCheckBox.IsChecked = task.SearchParameters.WriteHighQValuePsms;
WriteDecoyCheckBox.IsChecked = task.SearchParameters.WriteDecoys;
WriteContaminantCheckBox.IsChecked = task.SearchParameters.WriteContaminants;
Expand Down Expand Up @@ -650,6 +651,7 @@ private void SaveButton_Click(object sender, RoutedEventArgs e)
TheTask.SearchParameters.UpdateSpectralLibrary = UpdateSpectralLibraryCheckBox.IsChecked.Value;
TheTask.SearchParameters.CompressIndividualFiles = CompressIndividualResultsCheckBox.IsChecked.Value;
TheTask.SearchParameters.IncludeModMotifInMzid = IncludeMotifInModNamesCheckBox.IsChecked.Value;
TheTask.SearchParameters.WriteDigestionProductCountFile = WriteDigestCountCheckBox.IsChecked.Value;

if (RemoveContaminantRadioBox.IsChecked.Value)
{
Expand Down
68 changes: 67 additions & 1 deletion MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
/// Used for storage of results for writing to Results.tsv. It is explained in the method ConstructResultsDictionary()
/// </summary>
private Dictionary<(string,string),string> ResultsDictionary { get; set; }

/// <summary>
/// Used for storage of results for writing digestion product counts to a .tsv.
/// </summary>
internal IDictionary<(string Accession, string BaseSeqeunce), int> DigestionCountDictionary { get; set; }
public PostSearchAnalysisTask()
: base(MyTask.Search)
{
Expand Down Expand Up @@ -110,6 +113,12 @@
UpdateSpectralLibrary();
}

if (Parameters.SearchParameters.WriteDigestionProductCountFile)
{
WriteDigestionCountByProtein();
WriteDigestionCountHistogram();
}

WriteFlashLFQResults();

if (Parameters.ProteinList.Any((p => p.AppliedSequenceVariations.Count > 0)))
Expand Down Expand Up @@ -1939,5 +1948,62 @@

FinishedWritingFile(peaksPath, nestedIds);
}

/// <summary>
/// Writes the digestion product counts for each protein to a .tsv file.
/// </summary>
private void WriteDigestionCountByProtein()
{
if (DigestionCountDictionary.IsNullOrEmpty())
return;

Check warning on line 1958 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs#L1958

Added line #L1958 was not covered by tests

var nestedIds = new List<string> { Parameters.SearchTaskId };
var countByProteinPath = Path.Combine(Parameters.OutputFolder, $"DigestionCountsBy{GlobalVariables.AnalyteType.GetBioPolymerLabel()}s.tsv");

// write all values to file
using (var writer = new StreamWriter(countByProteinPath))
{
writer.WriteLine("Protein Accession\tPrimary Sequence\tDigestion Products");
foreach (var proteinEntry in DigestionCountDictionary!)
{
if (!Parameters.SearchParameters.WriteDecoys && proteinEntry.Key.Accession.StartsWith("DECOY"))
continue;
writer.WriteLine($"{proteinEntry.Key.Accession}\t{proteinEntry.Key.BaseSeqeunce}\t{proteinEntry.Value}");
}
}
FinishedWritingFile(countByProteinPath, nestedIds);
}

/// <summary>
/// Writes a histogram of digestion product counts to a .tsv file.
/// </summary>
private void WriteDigestionCountHistogram()
{
if (DigestionCountDictionary.IsNullOrEmpty())
return;

Check warning on line 1983 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs#L1983

Added line #L1983 was not covered by tests

var nestedIds = new List<string> { Parameters.SearchTaskId };
var countHistogramPath = Path.Combine(Parameters.OutputFolder, $"DigestionCountHistogram.tsv");

// Create Histogram
var countDictionary = new Dictionary<int, int>(CommonParameters.DigestionParams.MaxModificationIsoforms);
foreach (var proteinEntry in DigestionCountDictionary!)
{
if (!Parameters.SearchParameters.WriteDecoys && proteinEntry.Key.Accession.StartsWith("DECOY"))
continue;

Check warning on line 1993 in MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs

View check run for this annotation

Codecov / codecov/patch

MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs#L1993

Added line #L1993 was not covered by tests
countDictionary.Increment(proteinEntry.Value);
}

// Write Histogram
using (StreamWriter writer = new(countHistogramPath))
{
writer.WriteLine($"Digestion Products\tCount of {GlobalVariables.AnalyteType.GetBioPolymerLabel()}s");
foreach (var count in countDictionary.OrderBy(p => p.Key))
{
writer.WriteLine($"{count.Key}\t{count.Value}");
}
}
FinishedWritingFile(countHistogramPath, nestedIds);
}
}
}
3 changes: 2 additions & 1 deletion MetaMorpheus/TaskLayer/SearchTask/SearchParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
using UsefulProteomicsDatabases;
using EngineLayer;
using Omics.Modifications;
using Proteomics;

namespace TaskLayer
{
Expand Down Expand Up @@ -32,6 +31,7 @@ public SearchParameters()
WriteMzId = true;
WritePepXml = false;
IncludeModMotifInMzid = false;
WriteDigestionProductCountFile = false;

ModsToWriteSelection = new Dictionary<string, int>
{
Expand Down Expand Up @@ -103,5 +103,6 @@ public SearchParameters()
public SilacLabel EndTurnoverLabel { get; set; } //used for SILAC turnover experiments
public TargetContaminantAmbiguity TCAmbiguity { get; set; }
public bool IncludeModMotifInMzid { get; set; }
public bool WriteDigestionProductCountFile { get; set; }
}
}
16 changes: 13 additions & 3 deletions MetaMorpheus/TaskLayer/SearchTask/SearchTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
Status("Searching files...", new List<string> { taskId, "Individual Spectra Files" });

Dictionary<string, int[]> numMs2SpectraPerFile = new Dictionary<string, int[]>();
bool collectedDigestionInformation = false;
IDictionary<(string Accession, string BaseSequence), int> digestionCountDictionary = null;
for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
{
if (GlobalVariables.StopLoops) { break; }
Expand Down Expand Up @@ -374,8 +376,15 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
{
Status("Starting search...", thisId);
var newClassicSearchEngine = new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels,
SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId,SearchParameters.WriteSpectralLibrary);
newClassicSearchEngine.Run();
SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId,SearchParameters.WriteSpectralLibrary, SearchParameters.WriteDigestionProductCountFile);
var result = newClassicSearchEngine.Run();

// The same proteins (all of them) get digested with each classic search engine, therefor we only need to calculate this for the first file that runs
if (!collectedDigestionInformation)
{
collectedDigestionInformation = true;
digestionCountDictionary = (result.MyEngine as ClassicSearchEngine).DigestionCountDictionary;
}

ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
}
Expand Down Expand Up @@ -447,7 +456,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
{
Parameters = parameters,
FileSpecificParameters = this.FileSpecificParameters,
CommonParameters = CommonParameters
CommonParameters = CommonParameters,
DigestionCountDictionary = digestionCountDictionary
};
return postProcessing.Run();
}
Expand Down
Loading