diff --git a/docs/Changelog.html b/docs/Changelog.html
index a03e3f7..93a8140 100644
--- a/docs/Changelog.html
+++ b/docs/Changelog.html
@@ -13,6 +13,13 @@
MS-GF+ ChangeLog
MS-GF+ Documentation home
+
+ v2023.01.12
+
+
+ - Add parameter and output messages for working with particularly dense centroided data (read from mzML or mzXML)
+
+
v2022.04.18
diff --git a/docs/MSGFPlus.html b/docs/MSGFPlus.html
index 3bfe871..c526971 100644
--- a/docs/MSGFPlus.html
+++ b/docs/MSGFPlus.html
@@ -90,6 +90,10 @@ MS-GF+
[-maxMissedCleavages Count] (Exclude peptides with more than this number of missed cleavages from the search; Default: -1 (no limit))
[-numMods Count] (Maximum number of dynamic (variable) modifications per peptide; Default: 3)
+
+[-allowDenseCentroidedPeaks 0/1] (Default: 0 (disabled); 1: (for mzML/mzXML input only) allows inclusion of spectra with high-density centroid data in the search)
+ MS-GF+ checks the distance between consecutive peaks in the spectrum, and if the median distance is less than 50 ppm, they are considered profile spectra regardless of the value provided in mzML and mzXML files.
+ This parameter allows overriding this check when the mzML/mzXML file says the spectrum is centroided.
diff --git a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
index 01787e2..55982a0 100644
--- a/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
+++ b/src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java
@@ -50,6 +50,7 @@ public class SearchParams {
private double chargeCarrierMass;
private int maxMissedCleavages;
private int maxNumMods;
+ private boolean allowDenseCentroidedPeaks;
public SearchParams() {
}
@@ -214,6 +215,10 @@ public int getMaxMissedCleavages() {
return maxMissedCleavages;
}
+ // Used by MS-GF+
+ public boolean getAllowDenseCentroidedPeaks() {
+ return allowDenseCentroidedPeaks;
+ }
/**
* Look for # in dataLine
@@ -402,6 +407,8 @@ public String parse(ParamManager paramManager) {
} else if (maxMissedCleavages > -1 && enzyme.getName().equals("NoCleavage")) {
return "Cannot specify a MaxMissedCleavages when using no cleavage enzyme";
}
+
+ allowDenseCentroidedPeaks = paramManager.getAllowDenseCentroidedPeaks() == 1;
maxNumMods = paramManager.getMaxNumModsPerPeptide();
int maxNumModsCompare = aaSet.getMaxNumberOfVariableModificationsPerPeptide();
diff --git a/src/main/java/edu/ucsd/msjava/msutil/SpecKey.java b/src/main/java/edu/ucsd/msjava/msutil/SpecKey.java
index 4862882..796b3af 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/SpecKey.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/SpecKey.java
@@ -67,7 +67,8 @@ public static ArrayList getSpecKeyList(
int minCharge,
int maxCharge,
ActivationMethod activationMethod,
- int minNumPeaksPerSpectrum) {
+ int minNumPeaksPerSpectrum,
+ boolean allowDenseCentroidedData) {
Iterator itr = specAcc.getSpecItr();
@@ -78,7 +79,8 @@ public static ArrayList getSpecKeyList(
minCharge,
maxCharge,
activationMethod,
- minNumPeaksPerSpectrum);
+ minNumPeaksPerSpectrum,
+ allowDenseCentroidedData);
SpectrumParser parser = specAcc.getSpectrumParser();
@@ -101,7 +103,8 @@ public static ArrayList getSpecKeyList(
int minCharge,
int maxCharge,
ActivationMethod activationMethod,
- int minNumPeaksPerSpectrum) {
+ int minNumPeaksPerSpectrum,
+ boolean allowDenseCentroidedData) {
if (activationMethod == ActivationMethod.FUSION)
return getFusedSpecKeyList(itr, startSpecIndex, endSpecIndex, minCharge, maxCharge);
@@ -109,6 +112,7 @@ public static ArrayList getSpecKeyList(
ArrayList specKeyList = new ArrayList();
int numProfileSpectra = 0;
+ int numDenseCentroidedSpectra = 0;
int numSpectraWithTooFewPeaks = 0;
final int MAX_INFORMATIVE_MESSAGES = 10;
int informativeMessageCount = 0;
@@ -176,9 +180,18 @@ public static ArrayList getSpecKeyList(
}
}
- if (!spec.isCentroided()) {
+ if (!spec.isCentroided() && !(spec.isCentroidedWithDensePeaks() && allowDenseCentroidedData)) {
+ String message = "Skip spectrum " + spec.getID() + " since ";
+ if (spec.isCentroidedWithDensePeaks()) {
+ message += "peaks are too dense";
+ numDenseCentroidedSpectra++;
+ } else {
+ message += "it is not centroided";
+ numProfileSpectra++;
+ }
+
if (informativeMessageCount < MAX_INFORMATIVE_MESSAGES) {
- System.out.println("Skip spectrum " + spec.getID() + " since it is not centroided");
+ System.out.println(message);
informativeMessageCount++;
} else {
if (informativeMessageCount == MAX_INFORMATIVE_MESSAGES) {
@@ -186,7 +199,6 @@ public static ArrayList getSpecKeyList(
informativeMessageCount++;
}
}
- numProfileSpectra++;
continue;
}
@@ -206,6 +218,10 @@ public static ArrayList getSpecKeyList(
System.out.println("Ignoring " + numProfileSpectra + " profile spectra.");
System.out.println("Ignoring " + numSpectraWithTooFewPeaks + " spectra having less than " + minNumPeaksPerSpectrum + " peaks.");
+ if (numDenseCentroidedSpectra > 0) {
+ System.out.println("Ignoring " + numDenseCentroidedSpectra + " spectra marked as centroid with dense peaks (<50ppm median distance).\n" +
+ " Re-run search with parameter '-allowDenseCentroidedPeaks 1' to include these spectra in the search");
+ }
return specKeyList;
}
diff --git a/src/main/java/edu/ucsd/msjava/msutil/Spectrum.java b/src/main/java/edu/ucsd/msjava/msutil/Spectrum.java
index 59240e8..a18bfc1 100644
--- a/src/main/java/edu/ucsd/msjava/msutil/Spectrum.java
+++ b/src/main/java/edu/ucsd/msjava/msutil/Spectrum.java
@@ -44,6 +44,8 @@ public enum Polarity {
private Polarity scanPolarity = Polarity.POSITIVE;
private Boolean isCentroided = true;
+ private Boolean externalSetIsCentroided = false;
+ private Boolean isCentroidedWithDensePeaks = false;
private boolean isHighPrecision = false;
// private Tolerance precursorTolerance = null;
@@ -256,6 +258,15 @@ public boolean isCentroided() {
return this.isCentroided;
}
+ /**
+ * Whether this spectrum is centroided according to the reader, but failed determineIfCentroided() because peaks are too dense.
+ *
+ * @return false unless the reader called setIsCentroided(true) and determineIfCentroided() failed
+ */
+ public boolean isCentroidedWithDensePeaks() {
+ return this.isCentroidedWithDensePeaks;
+ }
+
/**
* Returns whether this spectrum peaks are measured with high-precision.
*
@@ -437,6 +448,8 @@ public void setScanPolarity(Polarity scanPolarity) {
*/
public void setIsCentroided(boolean isCentroided) {
this.isCentroided = isCentroided;
+ // function is used for mzML and mzXML files, track that isCentroided was set outside of this class
+ this.externalSetIsCentroided = true;
}
/**
@@ -489,7 +502,7 @@ public Float getIsolationWindowTargetMz() {
* Sets isCentroided by a simple testing.
*/
public void determineIsCentroided() {
- this.isCentroided = true;
+ boolean centroidedCheckPass = true;
// if(this.size() > 100)
// {
@@ -516,8 +529,21 @@ public void determineIsCentroided() {
prevMz = curMz;
}
Collections.sort(diff);
- if (diff.size() > 0 && diff.get(diff.size() / 2) < 50)
- isCentroided = false;
+ if (diff.size() > 0 && diff.get(diff.size() / 2) < 50) {
+ // Check failed - the median PPM distance between peaks is less than 50 PPM
+ centroidedCheckPass = false;
+ }
+ }
+
+ if (centroidedCheckPass) {
+ this.isCentroided = true;
+ } else {
+ if (this.isCentroided && this.externalSetIsCentroided) {
+ // set a flag to notify the user
+ this.isCentroidedWithDensePeaks = true;
+ }
+
+ this.isCentroided = false;
}
}
diff --git a/src/main/java/edu/ucsd/msjava/params/ParamManager.java b/src/main/java/edu/ucsd/msjava/params/ParamManager.java
index c8974dd..24862be 100644
--- a/src/main/java/edu/ucsd/msjava/params/ParamManager.java
+++ b/src/main/java/edu/ucsd/msjava/params/ParamManager.java
@@ -131,6 +131,9 @@ public enum ParamNameEnum {
ADD_FEATURES("addFeatures", "AddFeatures", "Include additional features in the output (enable this to post-process results with Percolator)",
"0 means Output basic scores only (Default)\n" +
"\t 1 means Output additional features"),
+
+ ALLOW_DENSE_CENTROIDED_PEAKS("allowDenseCentroidedPeaks", "AllowDenseCentroidedPeaks", "Allow centroid scans with dense peaks (Default: 0)\n" +
+ "\t (for mzML or mzXML files, the console output will tell you if you might want to use this)", null),
DD_DIRECTORY("dd", "DBIndexDir", "Path to the directory containing database index files", null),
@@ -652,6 +655,13 @@ private void addMaxNumModsParam() {
addParameter(maxNumMods);
}
+ private void addAllowDenseCentroidedPeaksParam() {
+ EnumParameter allowDenseCentroidedPeaksParam = new EnumParameter(ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS);
+ allowDenseCentroidedPeaksParam.registerEntry("Skip all spectra that fail a peak density check").setDefault();
+ allowDenseCentroidedPeaksParam.registerEntry("Allow mzML/mzXML centroided spectra that fail a peak density check");
+ addParameter(allowDenseCentroidedPeaksParam);
+ }
+
private void addDbIndexDirParam(boolean isHidden) {
FileParameter dbIndexDirParam = new FileParameter(ParamNameEnum.DD_DIRECTORY);
dbIndexDirParam.fileMustExist();
@@ -780,6 +790,8 @@ public void addMSGFPlusParams() {
addChargeCarrierMassParam();
addMaxMissedCleavagesParam();
addMaxNumModsParam();
+
+ addAllowDenseCentroidedPeaksParam();
addExample("Example (high-precision): java -Xmx3500M -jar MSGFPlus.jar -s test.mzML -d IPI_human_3.79.fasta -inst 1 -t 20ppm -ti -1,2 -ntt 2 -tda 1 -o testMSGFPlus.mzid -mod Mods.txt");
addExample("Example (low-precision): java -Xmx3500M -jar MSGFPlus.jar -s test.mzML -d IPI_human_3.79.fasta -inst 0 -t 0.5Da,2.5Da -ntt 2 -tda 1 -o testMSGFPlus.mzid -mod Mods.txt");
@@ -907,6 +919,8 @@ public void addMSGFDBParams() {
uniformAAProb.registerEntry("Use amino acid probabilities computed from the input database").setDefault();
uniformAAProb.registerEntry("Use probability 0.05 for all amino acids");
addParameter(uniformAAProb);
+
+ addAllowDenseCentroidedPeaksParam();
addExample("Example (high-precision): java -Xmx2000M -jar MSGFDB.jar -s test.mzXML -d IPI_human_3.79.fasta -t 30ppm -c13 1 -nnet 0 -tda 1 -o testMSGFDB.tsv");
addExample("Example (low-precision): java -Xmx2000M -jar MSGFDB.jar -s test.mzXML -d IPI_human_3.79.fasta -t 0.5Da,2.5Da -nnet 0 -tda 1 -o testMSGFDB.tsv");
@@ -1175,6 +1189,11 @@ public FileParameter getConfigFileParam() {
return ((FileParameter) getParameter(ParamNameEnum.CONFIGURATION_FILE.key));
}
+ // Used by MS-GF+
+ public int getAllowDenseCentroidedPeaks() {
+ return getIntValue(ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS.key);
+ }
+
public int getIntValue(String key) {
Parameter param = this.getParameter(key);
if (param instanceof IntParameter)
diff --git a/src/main/java/edu/ucsd/msjava/ui/MSGFDB.java b/src/main/java/edu/ucsd/msjava/ui/MSGFDB.java
index 763a771..1fe8646 100644
--- a/src/main/java/edu/ucsd/msjava/ui/MSGFDB.java
+++ b/src/main/java/edu/ucsd/msjava/ui/MSGFDB.java
@@ -208,6 +208,7 @@ private static String runMSGFDB(File specFile, SpecFileFormat specFormat, File o
boolean useUniformAAProb = paramManager.getIntValue(ParamManager.ParamNameEnum.UNIFORM_AA_PROBABILITY.getKey()) == 1;
boolean replicateMergedResults = paramManager.getIntValue("replicate") == 1;
boolean doNotDseEdgeScore = paramManager.getIntValue(ParamManager.ParamNameEnum.EDGE_SCORE.getKey()) == 1;
+ boolean allowDenseCentroidedPeaks = paramManager.getIntValue(ParamManager.ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS.getKey()) == 1;
System.out.println("Loading database files...");
File dbIndexDir = paramManager.getFile(ParamManager.ParamNameEnum.DD_DIRECTORY.getKey());
@@ -277,7 +278,7 @@ private static String runMSGFDB(File specFile, SpecFileFormat specFormat, File o
int avgPeptideMass = 2000;
int numBytesPerMass = 12;
int numSpecScannedTogether = (int) ((float) maxMemory / avgPeptideMass / numBytesPerMass);
- ArrayList specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM);
+ ArrayList specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM, allowDenseCentroidedPeaks);
int specSize = specKeyList.size();
System.out.print("Reading spectra finished ");
diff --git a/src/main/java/edu/ucsd/msjava/ui/MSGFDBLib.java b/src/main/java/edu/ucsd/msjava/ui/MSGFDBLib.java
index 648886e..b84dcf0 100644
--- a/src/main/java/edu/ucsd/msjava/ui/MSGFDBLib.java
+++ b/src/main/java/edu/ucsd/msjava/ui/MSGFDBLib.java
@@ -104,7 +104,7 @@ public static String runMSGFLib(ParamManager paramManager) {
int avgPeptideMass = 2000;
int numBytesPerMass = 12;
int numSpecScannedTogether = (int) ((float) maxMemory / avgPeptideMass / numBytesPerMass);
- ArrayList specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM);
+ ArrayList specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM, false);
int specSize = specKeyList.size();
System.out.print("Reading spectra finished ");
diff --git a/src/main/java/edu/ucsd/msjava/ui/MSGFPlus.java b/src/main/java/edu/ucsd/msjava/ui/MSGFPlus.java
index 5b1fdbb..83d435e 100644
--- a/src/main/java/edu/ucsd/msjava/ui/MSGFPlus.java
+++ b/src/main/java/edu/ucsd/msjava/ui/MSGFPlus.java
@@ -22,8 +22,8 @@
public class MSGFPlus {
- public static final String VERSION = "Release (v2022.04.18)";
- public static final String RELEASE_DATE = "18 April 2022";
+ public static final String VERSION = "Release (v2023.01.12)";
+ public static final String RELEASE_DATE = "12 January 2023";
public static final String DECOY_DB_EXTENSION = ".revCat.fasta";
public static final String DEFAULT_DECOY_PROTEIN_PREFIX = "XXX";
@@ -185,6 +185,7 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o
int numThreads = params.getNumThreads();
boolean doNotUseEdgeScore = params.doNotUseEdgeScore();
+ boolean allowDenseCentroidedPeaks = params.getAllowDenseCentroidedPeaks();
int minNumPeaksPerSpectrum = params.getMinNumPeaksPerSpectrum();
if (minNumPeaksPerSpectrum == -1) // not specified
@@ -267,7 +268,7 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o
return "Error while parsing spectrum file: " + specFile.getPath();
ArrayList specKeyList = SpecKey.getSpecKeyList(specAcc,
- startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, minNumPeaksPerSpectrum);
+ startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, minNumPeaksPerSpectrum, allowDenseCentroidedPeaks);
int specSize = specKeyList.size();
if (specSize == 0)