Skip to content

Commit

Permalink
perf: faster nwk parser, better annotation writing, neatened code
Browse files Browse the repository at this point in the history
  • Loading branch information
LeoFeatherstone committed Mar 14, 2024
1 parent 9050b31 commit 15ecf21
Show file tree
Hide file tree
Showing 6 changed files with 385 additions and 385 deletions.
10 changes: 5 additions & 5 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"editor.formatOnSave": true,
"[typescript]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
}
}
"editor.formatOnSave": true,
"[typescript]": {
"editor.defaultFormatter": "vscode.typescript-language-features"
}
}
240 changes: 120 additions & 120 deletions src/io/readers/newick.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ import { Tree, Node } from '../../';
import { SkipTreeException } from '../../utils/error';

/*
Parse a string in the New Hampshire format and return a pointer to the tree.
Parse a string in the New Hampshire format and return a pointer to the tree.
Is a slight modification of code written by Heng Li for jstreeview: at
https://github.com/lh3/jstreeview/blob/main/knhx.js
Is a slight modification of code written by Heng Li for jstreeview: at
https://github.com/lh3/jstreeview/blob/main/knhx.js
Modifications are for compatability with our tree object, and to avoid assigning
';' as the root label.
Modifications are for compatability with our tree object, and to avoid assigning
';' as the root label.
Function works by reding a .nwk string left to right. Where an open bracket is encountered,
we venture deeper into the tree which is reflected in pushing -1 to the stack array.
Function works by reding a .nwk string left to right. Where an open bracket is encountered,
we venture deeper into the tree which is reflected in pushing -1 to the stack array.
*/

/**
Expand All @@ -21,41 +21,41 @@ import { SkipTreeException } from '../../utils/error';
*/
export function readNewick(str: string) { // formerly kn_parse

var stack: number[] = [];
var nodes: Node[] = [];

for (var l = 0; l < str.length;) {
while (l < str.length && (str.charAt(l) < '!' || str.charAt(l) > '~')) ++l;
if (l == str.length) break;
var c = str.charAt(l);
if (c == ',') ++l;
else if (c == '(') {
stack.push(-1); ++l;
} else if (c == ')') {
var x, m, i;
x = nodes.length;
for (i = stack.length - 1; i >= 0; --i) {
if (stack[i] < 0) break;
}
if (i < 0) {
break; // TODO: Add error
}
m = stack.length - 1 - i;
l = kn_add_node(str, l + 1, nodes, m);
for (i = stack.length - 1, m = m - 1; m >= 0; --m, --i) {
nodes[x].children[m] = nodes[stack[i]];
nodes[stack[i]].parent = nodes[x];
}
stack.length = i;
stack.push(x);
} else {
stack.push(nodes.length);
l = kn_add_node(str, l, nodes, 0); // leaps l to index after non ',' or '{' or ')'
}
}
//if (stack.length > 1) tree.error |= 2; // TODO: Add error message
var tree = new Tree(nodes[nodes.length - 1]);
return tree;
var stack: number[] = [];
var nodes: Node[] = [];

for (var l = 0; l < str.length;) {
while (l < str.length && (str.charAt(l) < '!' || str.charAt(l) > '~')) ++l;
if (l == str.length) break;
var c = str.charAt(l);
if (c == ',') ++l;
else if (c == '(') {
stack.push(-1); ++l;
} else if (c == ')') {
var x, m, i;
x = nodes.length;
for (i = stack.length - 1; i >= 0; --i) {
if (stack[i] < 0) break;
}
if (i < 0) {
break; // TODO: Add error
}
m = stack.length - 1 - i;
l = kn_add_node(str, l + 1, nodes, m);
for (i = stack.length - 1, m = m - 1; m >= 0; --m, --i) {
nodes[x].children[m] = nodes[stack[i]];
nodes[stack[i]].parent = nodes[x];
}
stack.length = i;
stack.push(x);
} else {
stack.push(nodes.length);
l = kn_add_node(str, l, nodes, 0); // leaps l to index after non ',' or '{' or ')'
}
}
//if (stack.length > 1) tree.error |= 2; // TODO: Add error message
var tree = new Tree(nodes[nodes.length - 1]);
return tree;
}

/**
Expand All @@ -64,25 +64,25 @@ export function readNewick(str: string) { // formerly kn_parse
* @returns {Tree[]} Tree
*/
export function readTreesFromNewick(newick: string): Tree[] {
const trees: Tree[] = [];
const lines = newick.split(/;\s*\n/);

for (let thisLine of lines) {
thisLine = thisLine.trim();
if (thisLine.length === 0) continue;

try {
trees.push(readNewick(thisLine));
} catch (e) {
if (e instanceof SkipTreeException) {
console.log('Skipping Newick tree: ' + e.message);
} else {
throw e;
}
const trees: Tree[] = [];
const lines = newick.split(/;\s*\n/);

for (let thisLine of lines) {
thisLine = thisLine.trim();
if (thisLine.length === 0) continue;

try {
trees.push(readNewick(thisLine));
} catch (e) {
if (e instanceof SkipTreeException) {
console.log('Skipping Newick tree: ' + e.message);
} else {
throw e;
}
}
}
}

return trees;
return trees;
}

/**
Expand All @@ -98,49 +98,49 @@ export function readTreesFromNewick(newick: string): Tree[] {
* @returns {number}
*/
function kn_add_node(str: string, l: number, nodes: Node[], x: number) {
var r, beg: number, end: number = 0, z: Node;
var z = new Node(x); // TODO: Unsure if x is righ index
var label: string; // Node label
for (var i = l, beg = l; i < str.length && str.charAt(i) != ',' && str.charAt(i) != ')'; ++i) {
var c = str.charAt(i);
if (c == '[') { // TODO: Annotations
var meta_beg = i;
if (end == 0) end = i;
do ++i; while (i < str.length && str.charAt(i) != ']');
if (i == str.length) {
//tree.error |= 4; // <-- TODO: add unfinished annotation error
break;
}
z.annotation = parseNewickAnnotations(str.slice(meta_beg + 1, i))
} else if (c == ':') { // Parse branch length
if (end == 0) end = i;
for (var j = ++i; i < str.length; ++i) {
var cc = str.charAt(i);
if ((cc < '0' || cc > '9') && cc != 'e' && cc != 'E' && cc != '+' && cc != '-' && cc != '.')
break;
}
z.branchLength = parseFloat(str.slice(j, i));
--i;
} else if (c < '!' && c > '~' && end == 0) end = i;
}
if (end == 0) end = i;
if (end > beg) {
label = str.slice(beg, end)
.replace(/;$/g, "")
.replace(/^"|"$/g, "") // remove quotes
.replace(/^'|'$/g, "") // remove quotes

if(label.includes('#')) { // Hybrid case
let parsedLabel = parseHybridLabels(label)
z.label = parsedLabel['label']
z.hybridID = parsedLabel['hybridID']
} else {
label.length > 0 ? z.label = label : z.label = undefined
}
}
nodes.push(z);
return i;
var r, beg: number, end: number = 0, z: Node;
var z = new Node(x); // TODO: Unsure if x is righ index
var label: string; // Node label
for (var i = l, beg = l; i < str.length && str.charAt(i) != ',' && str.charAt(i) != ')'; ++i) {
var c = str.charAt(i);
if (c == '[') { // TODO: Annotations
var meta_beg = i;
if (end == 0) end = i;
do ++i; while (i < str.length && str.charAt(i) != ']');
if (i == str.length) {
//tree.error |= 4; // <-- TODO: add unfinished annotation error
break;
}
z.annotation = parseNewickAnnotations(str.slice(meta_beg + 1, i))
} else if (c == ':') { // Parse branch length
if (end == 0) end = i;
for (var j = ++i; i < str.length; ++i) {
var cc = str.charAt(i);
if ((cc < '0' || cc > '9') && cc != 'e' && cc != 'E' && cc != '+' && cc != '-' && cc != '.')
break;
}
z.branchLength = parseFloat(str.slice(j, i));
--i;
} else if (c < '!' && c > '~' && end == 0) end = i;
}
if (end == 0) end = i;
if (end > beg) {
label = str.slice(beg, end)
.replace(/;$/g, "")
.replace(/^"|"$/g, "") // remove quotes
.replace(/^'|'$/g, "") // remove quotes

if (label.includes('#')) { // Hybrid case
let parsedLabel = parseHybridLabels(label)
z.label = parsedLabel['label']
z.hybridID = parsedLabel['hybridID']
} else {
label.length > 0 ? z.label = label : z.label = undefined
}
}

nodes.push(z);
return i;
}

/**
Expand All @@ -154,21 +154,21 @@ function kn_add_node(str: string, l: number, nodes: Node[], x: number) {
* @returns {any}
*/
export function parseHybridLabels(label: string) {
if (!label.includes('#')) throw 'No hash(#), in hybrid label.'
if (!label.includes('#')) throw 'No hash(#), in hybrid label.'

let parsed: any = {}
let splitLabel = label.split('#')
let parsed: any = {}
let splitLabel = label.split('#')

parsed['label'] = splitLabel[0].length > 0 ? splitLabel[0] : undefined;
parsed['label'] = splitLabel[0].length > 0 ? splitLabel[0] : undefined;

let hybridID = Number(splitLabel[1].replace(/H|LGT|R/g, "")); // remove hybridisation types
if (Number.isInteger(hybridID)) { // hybridID must be integer
parsed['hybridID'] = hybridID
} else {
throw 'Hybrid index not an integer!'
}
let hybridID = Number(splitLabel[1].replace(/H|LGT|R/g, "")); // remove hybridisation types
if (Number.isInteger(hybridID)) { // hybridID must be integer
parsed['hybridID'] = hybridID
} else {
throw 'Hybrid index not an integer!'
}

return parsed;
return parsed;
}

/**
Expand All @@ -190,9 +190,9 @@ export function parseNewickAnnotations(annotations: string) {
annotations = annotations.slice(1);
}

const annotation_object: any = {};
const annotation_object: any = {};

const pairs = annotations.split(/[,:](?![^{]*\})/g); // Split on all ',' and ':' not in braces '{}'
const pairs = annotations.split(/[,:](?![^{]*\})/g); // Split on all ',' and ':' not in braces '{}'

pairs.forEach(pair => {
const keyValue: string[] = pair.split('=');
Expand All @@ -202,12 +202,12 @@ export function parseNewickAnnotations(annotations: string) {
// Handling array-like values enclosed in {}
if (value.includes('{') && value.includes('}')) {

annotation_object[key] = value
.replace(/{|}/g, '')
.split(/,|:/g);
annotation_object[key] = value
.replace(/{|}/g, '')
.split(/,|:/g);

} else {
annotation_object[key] = value
annotation_object[key] = value
}

});
Expand Down
Loading

0 comments on commit 15ecf21

Please sign in to comment.