diff --git a/src/TopDownProteomics/ProForma/TopPicProformaParser.cs b/src/TopDownProteomics/ProForma/TopPicProformaParser.cs
new file mode 100644
index 0000000..9877e34
--- /dev/null
+++ b/src/TopDownProteomics/ProForma/TopPicProformaParser.cs
@@ -0,0 +1,216 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text.RegularExpressions;
+
+namespace TopDownProteomics.ProForma;
+
+///
+/// A parser for TopPIC strings into a ProformaTerm
+///
+public class TopPicProformaParser
+{
+ IDictionary>? _modLookup = null;
+
+ #region Regex strings
+ Regex _modRx = new(@"\(([A-Z]{1,})\)(\[.+?\])+");
+ Regex _numberRx = new(@"(-?\+?[0-9]+.[0-9]+)");
+ Regex _terminalAaRx = new(@"\P{N}(\.)\P{N}??|\P{N}??(\.)\P{N}");
+ Regex _strippedSequenceRx = new(@"\[.+?\]|[()]");
+ #endregion
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ public TopPicProformaParser() { }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The mod.txt file for mapping modifications.
+ public TopPicProformaParser(string modFile)
+ {
+ _modLookup = ParseModFile(new FileInfo(modFile).OpenRead()); ;
+ }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The mod stream.
+ public TopPicProformaParser(Stream modStream)
+ {
+ _modLookup = ParseModFile(modStream);
+ }
+
+ ///
+ /// Gets the proforma term.
+ ///
+ /// The sequence.
+ ///
+ public ProFormaTerm ParseTopPicString(string sequence)
+ {
+ //first remove terminal AA tags if there!
+ sequence = RemoveTerminalAAs(sequence);
+ var (nTerms, cTerms, tags) = FindPTMs(sequence);
+ return new ProFormaTerm(GetFullyStrippedSequence(sequence), tags, nTerms, cTerms);
+ }
+
+ private IDictionary> ParseModFile(Stream modStream)
+ {
+ IDictionary> modLookup = new Dictionary>();
+
+ using StreamReader reader = new StreamReader(modStream);
+
+ while (!reader.EndOfStream)
+ {
+ var line = reader.ReadLine();
+
+ if (string.IsNullOrWhiteSpace(line) | line.StartsWith("#"))
+ continue;
+
+ //# To input a modification, use the following format:
+ //# Name,Mass,Residues,Position,UnimodID
+ var splitLine = line.Split(',');
+
+ if (splitLine.Length != 5)
+ throw new TopPicParserException("Failed to parse mod file");
+
+ var name = splitLine[0];
+
+ if (int.TryParse(splitLine[4], out var uniModNumber))
+ {
+ if (uniModNumber > 0)
+ modLookup.Add(name, new List()
+ {
+ new ProFormaDescriptor(ProFormaKey.Identifier, ProFormaEvidenceType.Unimod, $"UNIMOD:{uniModNumber}"),
+ new ProFormaDescriptor(ProFormaKey.Info, name)
+ });
+ else if (uniModNumber == -1 && double.TryParse(splitLine[1], out var mass))
+ modLookup.Add(name, new List()
+ {
+ new ProFormaDescriptor(ProFormaKey.Mass, $"{mass:+#.000000;-#.000000}"),
+ new ProFormaDescriptor(ProFormaKey.Info, name)
+ });
+ else
+ throw new TopPicParserException($"invalid UniMod Id or mass");
+ }
+ else
+ throw new TopPicParserException($"Failed to parse UniMod Id {splitLine[4]}".Trim());
+ }
+ return modLookup;
+ }
+
+ private string GetFullyStrippedSequence(string sequence) => _strippedSequenceRx.Replace(sequence, "");
+
+ private Dictionary GetIndexLookup(string sequence)
+ {
+ Dictionary indexLookup = new Dictionary();
+
+ bool inBracket = false;
+ int index = 0;
+ for (int i = 0; i < sequence.Length; i++)
+ {
+ char c = sequence[i];
+ if (c == '[')
+ inBracket = true;
+ else if (c == ']')
+ inBracket = false;
+ else if (char.IsUpper(c) && !inBracket)
+ {
+ indexLookup[i] = index++;
+ }
+ }
+ return indexLookup;
+ }
+
+ private Tuple, IList, IList> FindPTMs(string sequence)
+ {
+ var indexLookup = GetIndexLookup(sequence);
+
+ List nTerms = new List();
+ List cTerms = new List();
+ List tags = new List();
+
+ foreach (Match match in _modRx.Matches(sequence))
+ {
+ var startIndex = indexLookup[match.Groups[1].Index];
+ var ptms = match.Groups[2].Captures;
+
+ if (ptms.Count > 1)
+ throw new TopPicParserException("multiple mods are not currently supported");
+
+ if (startIndex == 0 && match.Groups[1].Length == 1) // check for ambiguous mods that include the start -> just make tags
+ {
+ nTerms = ParsePtms(ptms);
+ }
+ else if (startIndex == indexLookup.Max(x => x.Value))
+ {
+ cTerms = ParsePtms(ptms);
+ }
+ else if (match.Groups[1].Length > 1)
+ {
+ var EndIndex = startIndex + match.Groups[1].Length - 1;
+ tags.Add(new ProFormaTag(startIndex, EndIndex, ParsePtms(ptms)));
+ }
+ else
+ tags.Add(new ProFormaTag(startIndex, ParsePtms(ptms)));
+ }
+ return new Tuple, IList, IList>(nTerms, cTerms, tags);
+ }
+
+ private List ParsePtms(CaptureCollection ptms)
+ {
+ var proformaList = new List();
+
+ foreach (var ptm in ptms)
+ proformaList.AddRange(ParsePtmString(ptm.ToString()));
+
+ return proformaList;
+ }
+
+ private IList ParsePtmString(string ptmString)
+ {
+ //strip []
+ ptmString = ptmString.Substring(1, ptmString.Length - 2);
+ var numberMatch = _numberRx.Match(ptmString);
+
+ if (numberMatch.Success && Double.TryParse(numberMatch.Value, out double val))
+ return new List() { new ProFormaDescriptor(ProFormaKey.Mass, $"{val:+#.0000;-#.0000;0}")};
+
+ // Find and throw exception if there is a *
+ if (ptmString.Contains('*'))
+ throw new TopPicParserException("multiple mods are not currently supported");
+
+ if (_modLookup?.ContainsKey(ptmString) == true)
+ return _modLookup[ptmString];
+ else
+ return new List() { new ProFormaDescriptor(ptmString) };
+ }
+
+ private string RemoveTerminalAAs(string sequence)
+ {
+ var matches = _terminalAaRx.Matches(sequence);
+
+ if (matches.Count > 0)
+ {
+ var startIndex = matches[0].Groups[1].Index + 1;
+ var length = matches[1].Groups[1].Index - startIndex;
+ sequence = sequence.Substring(startIndex, length);
+ }
+ return sequence;
+ }
+}
+
+///
+/// An exception for the TopPIC to ProForma parser.
+///
+///
+public class TopPicParserException : Exception
+{
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The message that describes the error.
+ public TopPicParserException(string message) : base(message) { }
+}
\ No newline at end of file
diff --git a/tests/TopDownProteomics.Tests/ProForma/ToPicParserTests.cs b/tests/TopDownProteomics.Tests/ProForma/ToPicParserTests.cs
new file mode 100644
index 0000000..787c312
--- /dev/null
+++ b/tests/TopDownProteomics.Tests/ProForma/ToPicParserTests.cs
@@ -0,0 +1,100 @@
+using NUnit.Framework;
+using System.IO;
+using System.Text;
+using TopDownProteomics.ProForma;
+
+namespace TopDownProteomics.Tests.ProForma;
+
+///
+/// Tests for the TopPicProformaParser
+///
+[TestFixture]
+public class ToPicParserTests
+{
+ private static string GetTestDataFile(string name) => Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", name);
+
+ ///
+ /// Tests the TopPic Proforma Parser.
+ ///
+ [Test]
+ [TestCase("M.A(AAA)[Phospho]AAA.C", "A(AAA)[UNIMOD:21|Info:Phospho]AAA")]
+ [TestCase("W.(G)[Oxidation]DGCAQKNKPGVYTK(V)[Phospho]YNYVKWIKNTIAANS.", "[UNIMOD:35|Info:Oxidation]-GDGCAQKNKPGVYTKV[UNIMOD:21|Info:Phospho]YNYVKWIKNTIAANS")]
+ [TestCase(".GDGCAQKNKPGVYTK(V)[Phospho]YNYVKWIKNTIAANS.", "GDGCAQKNKPGVYTKV[UNIMOD:21|Info:Phospho]YNYVKWIKNTIAANS")]
+ [TestCase("W.GDGCAQKNKPGVYTKVYNYVKWIKNTIAAN(S)[Phospho].", "GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS-[UNIMOD:21|Info:Phospho]")]
+ [TestCase("W.GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS.", "GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS")]
+ [TestCase(".(G)[Test1]DGCAQKNKPGVYTKVYNYVKWIKNTIAANS.", "[+59.000000|Info:Test1]-GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS")]
+ [TestCase("W.(G)[T@s!1]DGCAQKNKPGVYTKVYNYVKWIKNTIAANS.", "[T@s!1]-GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS")]
+ [TestCase("W.(G)[Test_2]DGCAQKNKPGVYTKVYNYVKWIKNTIAANS.", "[+59.000000|Info:Test_2]-GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS")]
+ [TestCase(".(G)[Ox_plus1]DGCAQKNKPGVYTKVYNYVKWIKNTIAANS.", "[+17.123000|Info:Ox_plus1]-GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS")]
+ [TestCase(".(G)[+23.9987]DGCAQKNKPGVYTKVYNYVKWIKNTIAANS.", "[+23.9987]-GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS")]
+ [TestCase(".(G)[23.9987]DGCAQKNKPGVYTKVYNYVKWIKNTIAANS.", "[+23.9987]-GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS")]
+ [TestCase(".(G)[-23.9987]DGCAQKNKPGVYTKVYNYVKWIKNTIAANS.", "[-23.9987]-GDGCAQKNKPGVYTKVYNYVKWIKNTIAANS")]
+ public void CompareToProForma(string topPIC, string proForma)
+ {
+ var topicParser = new TopPicProformaParser(GetTestDataFile("topPicTestMods.txt"));
+ var term = topicParser.ParseTopPicString(topPIC);
+
+ var writer = new ProFormaWriter();
+
+ Assert.AreEqual(proForma, writer.WriteString(term));
+ }
+
+ ///
+ /// Tests the TopPic Proforma Parser with no mod file.
+ ///
+ [Test]
+ [TestCase("M.A(AAA)[Phospho]AAA.C", "A(AAA)[Phospho]AAA")]
+ [TestCase("W.(G)[Oxidation]DGCAQKNKPGVYTK(V)[Phospho]YNYVKWIKNTIAANS.", "[Oxidation]-GDGCAQKNKPGVYTKV[Phospho]YNYVKWIKNTIAANS")]
+ [TestCase("W.(G)[asdf4fdfsd6!]DGCAQKNKPGVYTKYNYVKWIKNTIAANS.", "[asdf4fdfsd6!]-GDGCAQKNKPGVYTKYNYVKWIKNTIAANS")]
+ public void CompareToProFormaNoModFile(string topPIC, string proForma)
+ {
+ var topicParser = new TopPicProformaParser();
+ var term = topicParser.ParseTopPicString(topPIC);
+
+ var writer = new ProFormaWriter();
+
+ Assert.AreEqual(proForma, writer.WriteString(term));
+ }
+
+ ///
+ /// Testing Exceptions.
+ ///
+ /// The top pic.
+ [Test]
+ [TestCase("M.A(AAA)[Phospho*4]AAA.C", "multiple mods are not currently supported")]
+ [TestCase("M.A(AAA)[Phospho][Phospho]AAA.C", "multiple mods are not currently supported")]
+ public void ParsingExceptionTesting(string topPIC, string exMessage)
+ {
+ var topicParser = new TopPicProformaParser(GetTestDataFile("topPicTestMods.txt"));
+
+ TestDelegate throwTest = () =>
+ {
+ var term = topicParser.ParseTopPicString(topPIC);
+ };
+
+ TopPicParserException ex = Assert.Throws(throwTest);
+ Assert.AreEqual(exMessage, ex.Message);
+ }
+
+ ///
+ /// Testing Exceptions.
+ ///
+ /// The top pic.
+ [Test]
+ [TestCase(@"Phospho,79.966331,STY,any,21,54", "Failed to parse mod file")]
+ [TestCase(@"Phospho,79.966331,STY,any,2O", "Failed to parse UniMod Id 2O")]
+ [TestCase(@"Phospho,79b.966331,STY,any,-1", "invalid UniMod Id or mass")]
+ [TestCase(@"Phospho,79.966331,STY,any,-5", "invalid UniMod Id or mass")]
+ public void ModFilePArsingExceptionTesting(string modFileString, string exMessage)
+ {
+ MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(modFileString));
+
+ TestDelegate throwTest = () =>
+ {
+ var topicParser = new TopPicProformaParser(stream);
+ };
+
+ TopPicParserException ex = Assert.Throws(throwTest);
+ Assert.AreEqual(exMessage, ex.Message);
+ }
+}
\ No newline at end of file
diff --git a/tests/TopDownProteomics.Tests/TestData/topPicTestMods.txt b/tests/TopDownProteomics.Tests/TestData/topPicTestMods.txt
new file mode 100644
index 0000000..9ad1ed8
--- /dev/null
+++ b/tests/TopDownProteomics.Tests/TestData/topPicTestMods.txt
@@ -0,0 +1,38 @@
+# This file is used to specify modifications
+# # for comments
+# To input a modification, use the following format:
+#
+# Name,Mass,Residues,Position,UnimodID
+#
+# Name: name of the modification (Unimod PSI-MS name)
+# - The Unimod PSI-MS names are recommended
+# - E.g. Phospho, Acetyl
+# - Visit http://www.unimod.org to get PSI-MS names.
+#
+# Mass: monoisotopic mass of modification.
+# - It is important to specify accurate masses (integer masses are insufficient).
+# - E.g. 15.994915
+#
+# Residues: amino acids that can be modified
+# - Use * if this modification is applicable to all the 20 amino acids.
+#
+# Position: positions in the protein where the modification can be attached.
+# - Only "any" can be used for anywhere
+#
+# UnimodID: unmimod id of the modification
+# - Please use -1, if not in unimod
+
+# Methionine oxidation
+Oxidation,15.994915,M,any,35
+
+# Phosphorylation
+Phospho,79.966331,STY,any,21
+
+# test1
+Test1,59.0000,STY,any,-1
+
+# test2
+Test_2,59.0000,STY,any,-1
+
+# test3
+Ox_plus1,17.1230,STY,any,-1
\ No newline at end of file