diff --git a/mzLib/MzLibUtil/TreeDictionary.cs b/mzLib/MzLibUtil/TreeDictionary.cs
new file mode 100644
index 000000000..3f9c642dc
--- /dev/null
+++ b/mzLib/MzLibUtil/TreeDictionary.cs
@@ -0,0 +1,327 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.ComponentModel;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace MzLibUtil
+{
+ // Implemented as a basic binary search tree, where nodes are ordered according to
+ // a key double (an m/z value) and a value (the corresponding intensity value)
+ // This affords efficient search, deletion, and insertion ( O(logn) )
+ // Keeping the "key" as a double allows operations such as FindNearest
+ public class SpectrumTree : IEnumerable
+ {
+ private Node _root;
+ public Node Root => _root;
+
+ public SpectrumTree()
+ {
+ _root = null;
+ }
+
+ ///
+ /// Create a new SpectrumTree object from m/z and intensity arrays
+ ///
+ ///
+ ///
+ public SpectrumTree(double[] mzArray, double[] intensityArray)
+ {
+ _root = null;
+ BuildTreeFromSpectrum(mzArray, intensityArray);
+ }
+
+ ///
+ /// Searches the spectrum tree for the closest peak to a target m/z
+ /// If the closest peak is within the given ppmTolerance of the target,
+ /// it is removed, its m/z and intensity are written to the respective
+ /// out variables
+ ///
+ /// m/z to be searched for
+ ///
+ /// the m/z of the closest peak
+ /// the intensity of the closest peak
+ /// True if the closest peak is within tolerance, false otherwise
+ public bool PopClosestPeak(double targetMz, PpmTolerance ppmTolerance,
+ out double peakMz, out double peakIntensity)
+ {
+ Node closestNode = FindNearest(targetMz);
+ if (closestNode != null && ppmTolerance.Within(targetMz, closestNode.Key))
+ {
+ peakMz = closestNode.Key;
+ peakIntensity = closestNode.Value;
+ Delete(closestNode);
+ return true;
+ }
+
+ peakMz = 0;
+ peakIntensity = 0;
+ return false;
+ }
+
+ ///
+ /// This function assumes, but does not require, that the mzArray is ordered.
+ /// For ordered arrays, creates a balanced binary tree
+ /// Unordered arrays are fine, but then the resulting tree will not be balanced
+ ///
+ ///
+ ///
+ public void BuildTreeFromSpectrum(double[] mzArray, double[] intensityArray)
+ {
+ BuildTreeHelper(mzArray, intensityArray, 0, mzArray.Length - 1);
+ }
+
+ private void BuildTreeHelper(double[] mzArray, double[] intensityArray, int minIndex, int maxIndex)
+ {
+ // Base case - the array cannot be subdivided further
+ if (minIndex == maxIndex)
+ {
+ Insert(new Node(mzArray[minIndex], intensityArray[minIndex]));
+ return;
+ }
+ if (maxIndex < minIndex) return; // Other base case, happens on right hand side of subsection
+
+ int midIndex = minIndex + (maxIndex - minIndex) / 2;
+ Insert(new Node(mzArray[midIndex], intensityArray[midIndex]));
+ BuildTreeHelper(mzArray, intensityArray, minIndex, midIndex - 1);
+ BuildTreeHelper(mzArray, intensityArray, midIndex + 1, maxIndex);
+ }
+
+ public void Insert(Node newNode)
+ {
+ if (Root == null)
+ {
+ _root = newNode;
+ return;
+ }
+
+ Node y = null;
+ Node x = Root;
+ while (x != null)
+ {
+ y = x;
+ x = newNode.Key < x.Key ? x.leftChild : x.rightChild;
+ }
+ newNode.parent = y;
+
+ if (newNode.Key < y.Key)
+ {
+ y.leftChild = newNode;
+ }
+ else
+ {
+ y.rightChild = newNode;
+ }
+ }
+
+ public void Delete(Node node)
+ {
+ if (node == _root)
+ {
+ if (node.leftChild == null & node.rightChild == null)
+ {
+ _root = null;
+ return;
+ }
+ // One child deletion
+ if (node.leftChild == null & node.rightChild != null)
+ {
+ _root = node.rightChild;
+ node.rightChild.parent = null;
+ return;
+ }
+ if (node.leftChild != null & node.rightChild == null)
+ {
+ _root = node.leftChild;
+ node.leftChild.parent = null;
+ return;
+ }
+ Node inOrderSuccesor = GetInOrderSuccessor(node);
+ node.SwapKeyValuePair(inOrderSuccesor.Key, inOrderSuccesor.Value);
+ Delete(inOrderSuccesor);
+ }
+ else if (node.parent.leftChild == node)
+ {
+ // Leaf deletion
+ if (node.leftChild == null & node.rightChild == null)
+ {
+ node.parent.leftChild = null;
+ return;
+ }
+ // One child deletion
+ if (node.leftChild == null & node.rightChild != null)
+ {
+ node.parent.leftChild = node.rightChild;
+ node.rightChild.parent = node.parent;
+ return;
+ }
+ if (node.leftChild != null & node.rightChild == null)
+ {
+ node.parent.leftChild = node.leftChild;
+ node.leftChild.parent = node.parent;
+ return;
+ }
+ // If the node has two children, swap values with the succesor, then delete the successor
+ Node inOrderSuccesor = GetInOrderSuccessor(node);
+ node.SwapKeyValuePair(inOrderSuccesor.Key, inOrderSuccesor.Value);
+ Delete(inOrderSuccesor);
+
+ }
+ else if (node.parent.rightChild == node)
+ {
+ if (node.leftChild == null & node.rightChild == null)
+ {
+ node.parent.rightChild = null;
+ return;
+ }
+ if (node.leftChild == null & node.rightChild != null)
+ {
+ node.parent.rightChild = node.rightChild;
+ node.rightChild.parent = node.parent;
+ return;
+ }
+ if (node.leftChild != null & node.rightChild == null)
+ {
+ node.parent.rightChild = node.leftChild;
+ node.leftChild.parent = node.parent;
+ return;
+ }
+ Node inOrderSuccesor = GetInOrderSuccessor(node);
+ node.SwapKeyValuePair(inOrderSuccesor.Key, inOrderSuccesor.Value);
+ Delete(inOrderSuccesor);
+ }
+ }
+
+ internal static Node GetInOrderSuccessor(Node predeccesor)
+ {
+ if (predeccesor.rightChild != null)
+ {
+ return GetMinNode(predeccesor.rightChild);
+ }
+
+ Node parent = predeccesor.parent;
+ while (parent != null && predeccesor != parent.leftChild)
+ {
+ predeccesor = parent;
+ parent = parent.parent;
+ }
+
+ return parent;
+ }
+
+ internal static Node GetMinNode(Node parent)
+ {
+ if (parent == null) return null;
+
+ Node current = parent;
+ while (current.leftChild != null)
+ {
+ current = current.leftChild;
+ }
+
+ return current;
+ }
+
+ private Node FindNearest(double searchValue)
+ {
+ Node current = Root;
+ Node closestNode = Root;
+ double minDif = Math.Abs(searchValue - current.Key);
+ double currentDif = 0;
+ while (current != null)
+ {
+ currentDif = Math.Abs(searchValue - current.Key);
+ if (currentDif < minDif)
+ {
+ closestNode = current;
+ minDif = currentDif;
+ }
+
+ if (current.Key > searchValue)
+ {
+ current = current.leftChild;
+ }
+ else if (current.Key < searchValue)
+ {
+ current = current.rightChild;
+ }
+ else return current;
+ }
+
+ return closestNode;
+ }
+
+ public IEnumerator GetEnumerator()
+ {
+ return new TreeEnumerator(_root);
+ }
+ }
+
+ public class Node
+ {
+ // Theoretically the key could be any IComparable, but then FindNearest wouldn't be possible
+ private double _key;
+ public double Key => _key;
+ private double _value;
+ public double Value => _value;
+
+ internal Node parent;
+ internal Node leftChild;
+ internal Node rightChild;
+
+ public Node(double key, double value)
+ {
+ _key = key;
+ _value = value;
+ }
+
+ internal void SwapKeyValuePair(double newKey, double newValue)
+ {
+ _key = newKey;
+ _value = newValue;
+ }
+
+ public override string ToString()
+ {
+ return Key + "," + Value;
+ }
+ }
+
+ public class TreeEnumerator : IEnumerator
+ {
+ private Node _currentNode = null;
+ private Node _nextNode = null;
+ private Node _rootNode;
+
+ public TreeEnumerator(Node root)
+ {
+ _rootNode = root;
+ _nextNode = _rootNode != null ? SpectrumTree.GetMinNode(_rootNode) : null;
+ }
+
+ public bool MoveNext()
+ {
+ if (_nextNode != null)
+ {
+ _currentNode = _nextNode;
+ _nextNode = SpectrumTree.GetInOrderSuccessor(_currentNode);
+ return true;
+ }
+
+ return false;
+ }
+
+ public void Reset()
+ {
+ _currentNode = null;
+ _nextNode = SpectrumTree.GetMinNode(_rootNode);
+ }
+
+ object IEnumerator.Current => _currentNode;
+ public Node Current => _currentNode;
+
+ }
+}
diff --git a/mzLib/Test/TestMzLibUtil.cs b/mzLib/Test/TestMzLibUtil.cs
index d0ee538d6..8f075d532 100644
--- a/mzLib/Test/TestMzLibUtil.cs
+++ b/mzLib/Test/TestMzLibUtil.cs
@@ -5,6 +5,9 @@
using System.Text;
using System.Threading.Tasks;
using MzLibUtil;
+using MassSpectrometry;
+using System.Globalization;
+using System.IO;
namespace Test
{
@@ -33,5 +36,186 @@ public static void TestPeriodTolerantFilenameWithoutExtension(string filenameAnd
string result = PeriodTolerantFilenameWithoutExtension.GetPeriodTolerantFilenameWithoutExtension(filenameAndOrPath);
Assert.AreEqual(expectedResult, result);
}
+
+ public MzSpectrum ActualSpectrum;
+
+ [OneTimeSetUp]
+ public void SetUp()
+ {
+ // Test with actual data
+ //txt file, not mgf, because it's an MS1. Most intense proteoform has mass of ~14037.9 Da
+ string Ms1SpectrumPath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DataFiles\14kDaProteoformMzIntensityMs1.txt");
+
+ string[] spectrumLines = File.ReadAllLines(Ms1SpectrumPath);
+
+ int mzIntensityPairsCount = spectrumLines.Length;
+ double[] ms1mzs = new double[mzIntensityPairsCount];
+ double[] ms1intensities = new double[mzIntensityPairsCount];
+
+ for (int i = 0; i < mzIntensityPairsCount; i++)
+ {
+ string[] pair = spectrumLines[i].Split('\t');
+ ms1mzs[i] = Convert.ToDouble(pair[0], CultureInfo.InvariantCulture);
+ ms1intensities[i] = Convert.ToDouble(pair[1], CultureInfo.InvariantCulture);
+ }
+
+ ActualSpectrum = new MzSpectrum(ms1mzs, ms1intensities, false);
+ }
+
+ [Test]
+ public void TestSpectrumTreeInsertion()
+ {
+ MzSpectrum unorderedSpectrum = new(new double[] { 5, 6, 1, 2, 3, 4, 7, 8 }, new double[] { 10, 8, 2, 4, 6, 8, 6, 4 }, false);
+ MzSpectrum orderedSpectrum = new(new double[] { 1, 2, 3, 4, 5, 6, 7, 8 }, new double[] { 2, 4, 6, 8, 10, 8, 6, 4 }, false);
+ SpectrumTree testTree = new();
+ for (int i = 0; i < 8; i++)
+ {
+ testTree.Insert(new Node(unorderedSpectrum.XArray[i], unorderedSpectrum.YArray[i]));
+ }
+
+ double[] mz = new double[8];
+ double[] intensity = new double[8];
+ int j = 0;
+ foreach (Node node in testTree)
+ {
+ mz[j] = node.Key;
+ intensity[j] = node.Value;
+ j++;
+ }
+
+ MzSpectrum reconstructedSpectrum = new(mz, intensity, false);
+ Assert.That(orderedSpectrum.Equals(reconstructedSpectrum));
+ }
+
+ [Test]
+ public void TestSpectrumTreeDeletion()
+ {
+ MzSpectrum unorderedSpectrum = new(new double[] { 5, 6, 1, 2, 3, 4, 7, 8 }, new double[] { 10, 8, 2, 4, 6, 8, 6, 4 }, false);
+ SpectrumTree testTree = new();
+ List nodeList = new List();
+ for (int i = 0; i < 8; i++)
+ {
+ nodeList.Add(new Node(unorderedSpectrum.XArray[i], unorderedSpectrum.YArray[i]));
+ testTree.Insert(nodeList[i]);
+ }
+
+ testTree.Delete(nodeList[3]);
+ testTree.Delete(nodeList[5]);
+ double[] mz = new double[6];
+ double[] intensity = new double[6];
+ int j = 0;
+ foreach (Node node in testTree)
+ {
+ mz[j] = node.Key;
+ intensity[j] = node.Value;
+ j++;
+ }
+
+ MzSpectrum orderedSpectrum = new(new double[] { 1, 3, 5, 6, 7, 8 }, new double[] { 2, 6, 10, 8, 6, 4 }, false);
+ MzSpectrum reconstructedSpectrum = new(mz, intensity, false);
+ Assert.That(orderedSpectrum.Equals(reconstructedSpectrum));
+
+ foreach (Node node in testTree)
+ {
+ testTree.Delete(node);
+ }
+ Assert.That(testTree.Root == null);
+
+ // Testing with actual data. Deleting the middle 1000 entries from the tree, then
+ // checking for fidelity
+ PpmTolerance tolerance = new PpmTolerance(5);
+ testTree = new SpectrumTree(ActualSpectrum.XArray, ActualSpectrum.YArray);
+ for (int i = 1500; i < 2500; i++)
+ {
+ // we should be able to find every peak present in the spectrum
+ if (!testTree.PopClosestPeak(ActualSpectrum.XArray[i], tolerance,
+ out var peakMz, out var peakIntensity))
+ {
+ Assert.That(false);
+ }
+ }
+
+ double[] deletionMz = new double[3090];
+ double[] deletionIntensity = new double[3090];
+ double[] treeMz = new double[3090];
+ double[] treeIntensity = new double[3090];
+ for (int i = 0; i < 1500; ++i)
+ {
+ deletionMz[i] = ActualSpectrum.XArray[i];
+ deletionIntensity[i] = ActualSpectrum.YArray[i];
+ }
+ for (int i = 2500; i < 4090; i++)
+ {
+ deletionMz[i-1000] = ActualSpectrum.XArray[i];
+ deletionIntensity[i-1000] = ActualSpectrum.YArray[i];
+ }
+
+ j = 0;
+ foreach (Node node in testTree)
+ {
+ treeMz[j] = node.Key;
+ treeIntensity[j] = node.Value;
+ j++;
+ }
+
+ MzSpectrum deletionSpectrum = new MzSpectrum(deletionMz, deletionIntensity, true);
+ MzSpectrum treeSpectrum = new MzSpectrum(treeMz, treeIntensity, true);
+ Assert.That(treeSpectrum.Equals(deletionSpectrum));
+
+ }
+
+ [Test]
+ public void TestSpectrumTreeBuilder()
+ {
+ MzSpectrum orderedSpectrum = new(new double[] { 1, 2, 3, 4, 5, 6, 7, 8 }, new double[] { 2, 4, 6, 8, 10, 8, 6, 4 }, false);
+ SpectrumTree testTree = new();
+ testTree.BuildTreeFromSpectrum(orderedSpectrum.XArray, orderedSpectrum.YArray);
+
+ double[] mz = new double[8];
+ double[] intensity = new double[8];
+ int j = 0;
+ Node lastNode = null;
+ foreach (Node node in testTree)
+ {
+ mz[j] = node.Key;
+ intensity[j] = node.Value;
+ j++;
+ lastNode = node;
+ }
+
+ MzSpectrum reconstructedSpectrum = new(mz, intensity, false);
+ Assert.That(orderedSpectrum.Equals(reconstructedSpectrum));
+ Assert.That(lastNode.ToString().Equals("8,4"));
+
+
+ testTree = new();
+ testTree.BuildTreeFromSpectrum(ActualSpectrum.XArray, ActualSpectrum.YArray);
+
+ mz = new double[ActualSpectrum.Size];
+ intensity = new double[ActualSpectrum.Size];
+ j = 0;
+ foreach (Node node in testTree)
+ {
+ mz[j] = node.Key;
+ intensity[j] = node.Value;
+ j++;
+ }
+
+ reconstructedSpectrum = new(mz, intensity, false);
+ Assert.That(ActualSpectrum.Equals(reconstructedSpectrum));
+ }
+
+ [Test]
+ public void TestSpectrumTreeFindNearestPeak()
+ {
+ SpectrumTree testTree = new SpectrumTree(ActualSpectrum.XArray, ActualSpectrum.YArray);
+ PpmTolerance tol = new PpmTolerance(10); // 10 ppm is equal to 0.01 Th at 1000 Th
+ testTree.PopClosestPeak(1000.5, tol,out var mz, out var intensity);
+ Assert.That(tol.Within(1000.5, mz));
+ Assert.That(Math.Abs(1491094 - intensity) < 0.01);
+ // The peak should have been removed by the pop operation, so attempting to call it again should fail
+ Assert.That(!testTree.PopClosestPeak(1000.5, tol, out mz, out intensity));
+
+ }
}
}