diff --git a/doc/wiki b/doc/wiki index 0e573e6285..17836a40ad 160000 --- a/doc/wiki +++ b/doc/wiki @@ -1 +1 @@ -Subproject commit 0e573e6285197cbb2d0a119fc067b2c15e056783 +Subproject commit 17836a40ad78b7d4d997cb711c32873201a2e673 diff --git a/src/primer_filter.cpp b/src/primer_filter.cpp new file mode 100644 index 0000000000..19bdc91273 --- /dev/null +++ b/src/primer_filter.cpp @@ -0,0 +1,538 @@ +#include "primer_filter.hpp" +#include +#include "surjector.hpp" +#include "hts_alignment_emitter.hpp" + +namespace vg { + +using namespace std; + +//#define DEBUG_PRIMER_FILTER + +// Constructor +PrimerFinder::PrimerFinder(const unique_ptr& graph_param, + const SnarlDistanceIndex* distance_index_param, ifstream& primers_file_handle, + const gbwtgraph::GBWTGraph& gbwt_graph_param, const gbwt::GBWT& gbwt_index_param, + const gbwt::FastLocate& r_index_param, MinimizerMapper* giraffe_mapper_param) + : graph(graph_param.get()), + distance_index(distance_index_param), + gbwt_graph(gbwt_graph_param), + gbwt_index(gbwt_index_param), + r_index(r_index_param), + giraffe_mapper(giraffe_mapper_param) { + + load_primers(primers_file_handle); +} + +// Destructor +PrimerFinder::~PrimerFinder() { + // nothing to do +} + +const vector& PrimerFinder::get_primer_pairs_of_chrom(const string& chrom_name) const { + return chroms.at(chrom_name); +} + +// Make a new pair of primers with given attributes. Primers are processed and +// added to primer_pairs and selected_primer_pairs. +void PrimerFinder::add_primer_pair(const string& path_name, + const size_t& left_primer_starting_node_id, const size_t& left_primer_offset, + const size_t& left_primer_length, const size_t& right_primer_starting_node_id, + const size_t& right_primer_offset, const size_t& right_primer_length) { + + chroms.at(path_name).emplace_back(); + PrimerPair& primer_pair = chroms.at(path_name).back(); + primer_pair.chromosome_name = path_name; + primer_pair.template_position = 0; + primer_pair.right_primer.left = false; + + make_primer(primer_pair.left_primer, path_name, left_primer_starting_node_id, + left_primer_offset, left_primer_length, true); + make_primer(primer_pair.right_primer, path_name, right_primer_starting_node_id, + right_primer_offset, right_primer_length, false); + primer_pair.linear_product_size = primer_pair.right_primer.position_template + - primer_pair.left_primer.position_template + primer_pair.right_primer.length; + update_variation(primer_pair, path_name); + update_min_max_product_size(primer_pair); +} + +void PrimerFinder::load_primers(ifstream& file_handle) { + + //ifstream file_handle(path_to_primers); + assert(file_handle.is_open()); + + + // Regular expressions for matching fields with numbers + std::regex left_seq ("^PRIMER_LEFT_[0-9]*_SEQUENCE.*"); + std::regex right_seq ("^PRIMER_RIGHT_[0-9]*_SEQUENCE.*"); + std::regex left_primer_position ("^PRIMER_LEFT_[0-9]*=.*"); + std::regex right_primer_position ("^PRIMER_RIGHT_[0-9]*=.*"); + + vector::iterator curr_primer_iterator; + + string chromosome_name = ""; + string template_feature = ""; + size_t template_position = std::numeric_limits::max(); + bool has_path = false; + + string line; + while (getline(file_handle, line)) { + line = strip(line); + + if (line == "=") { + //End of the record for one primer pair + chromosome_name = ""; + template_feature = ""; + template_position = std::numeric_limits::max(); + has_path = false; + } else if (startswith(line, "SEQUENCE_ID")) { + //Get the path, path offset, and features from the sequence_id of the primer pair + //This will be the same for all primer pairs up to the next "=" + vector cur_fields = move(split(split(line,'=')[1], '|')); + + if (cur_fields.size() == 4) { + //If the sequence id is correctly formatted + chromosome_name = cur_fields[0]; + template_feature = cur_fields[1] + "|" + cur_fields[2]; + template_position = stoi(cur_fields[3]); + has_path = graph->has_path(chromosome_name); + if (!has_path) { + cerr << "warning: primer finder can't find a path named " << chromosome_name << " in the graph" << endl << "\tfalling back on mapping the template sequence" << endl; + } + } else { + template_feature = line; + has_path = false; + cerr << "warning: primer finder " << line << " is not formatted with a path and offset" << endl << "\tfalling back on mapping the template sequence" << endl; + } +#ifdef DEBUG_PRIMER_FILTER + cerr << "FIND PRIMERS FOR INPUT " << line << ": " << chromosome_name << ", " << template_feature << ", " << template_position << endl; +#endif + + } else if (startswith(line, "SEQUENCE_TEMPLATE") && !has_path) { + //If the path from the sequence id isn't in the graph, then get the path and path offset by mapping the sequence + string seq = split(line,'=')[1]; + if (giraffe_mapper == nullptr) { + throw std::runtime_error("error: primer filter doesn't have a minimizer file to map the template"); + } + std::tie(chromosome_name, template_position) = get_graph_coordinates_from_sequence(seq); + + } else if (startswith(line, "PRIMER_PAIR_NUM_RETURNED")) { + //How many primer pairs for this sequence template? + + size_t primer_pair_count = stoi(split(line,'=')[1]); + size_t new_vector_start = chroms[chromosome_name].size(); + + //Add all new primer pairs for this template + chroms.reserve(new_vector_start + primer_pair_count); + for (size_t i = 0 ; i < primer_pair_count ; i++) { + chroms[chromosome_name].emplace_back(); + chroms[chromosome_name].back().chromosome_name = chromosome_name; + chroms[chromosome_name].back().template_position = template_position; + chroms[chromosome_name].back().template_feature = template_feature; + chroms[chromosome_name].back().right_primer.left = false; + } + + //Set the current primer pair iterator to the first new pair + curr_primer_iterator = chroms[chromosome_name].begin() + new_vector_start; + } else if (std::regex_match(line, left_seq)) { + curr_primer_iterator->left_primer.sequence = split(line, '=')[1]; +#ifdef DEBUG_PRIMER_FILTER + cerr << "\tGet left sequence " << line << ": " << curr_primer_iterator->left_primer.sequence << endl; +#endif + } else if (std::regex_match(line, right_seq)) { + curr_primer_iterator->right_primer.sequence = split(line, '=')[1]; +#ifdef DEBUG_PRIMER_FILTER + cerr << "\tGet right sequence " << line << ": " << curr_primer_iterator->left_primer.sequence << endl; +#endif + } else if (std::regex_match(line, left_primer_position)) { + //Start position and length of the left primer + curr_primer_iterator->left_primer.position_template = stoi(split(split(line, '=')[1], ',')[0]); + curr_primer_iterator->left_primer.length = stoi(split(split(line, '=')[1], ',')[1]); + curr_primer_iterator->left_primer.position_chromosome = curr_primer_iterator->left_primer.position_template + template_position; +#ifdef DEBUG_PRIMER_FILTER + cerr << "old template position " << template_position << endl; + cerr << "\tGet left primer position" << line << ": " << curr_primer_iterator->left_primer.position_template << ", " + << curr_primer_iterator->left_primer.length << ", " + << curr_primer_iterator->left_primer.position_chromosome << endl; +#endif + } else if (std::regex_match(line, right_primer_position)) { +#ifdef DEBUG_PRIMER_FILTER + cerr << "\tGet right primer position" << line << ": " << curr_primer_iterator->left_primer.position_chromosome << endl; +#endif + //Start position and length of the right primer + size_t right_primer_offset = stoi(split(split(line, '=')[1], ',')[0]); + curr_primer_iterator->right_primer.length = stoi(split(split(line, '=')[1], ',')[1]); + curr_primer_iterator->right_primer.position_chromosome = right_primer_offset - curr_primer_iterator->right_primer.length + 1 + template_position; + curr_primer_iterator->right_primer.position_template = right_primer_offset - curr_primer_iterator->right_primer.length + 1; + + //This is the last thing for this primer pair, so update the primer pair + map_to_nodes(curr_primer_iterator->left_primer, chromosome_name); + map_to_nodes(curr_primer_iterator->right_primer, chromosome_name); + + curr_primer_iterator->linear_product_size = curr_primer_iterator->right_primer.position_template + - curr_primer_iterator->left_primer.position_template + curr_primer_iterator->right_primer.length; + update_variation(*curr_primer_iterator, chromosome_name); + update_min_max_product_size(*curr_primer_iterator); + + //Iterator to the new primer pair + curr_primer_iterator++; + } + + } +} + +const size_t PrimerFinder::total_reference_paths() const { + return chroms.size(); +} + +vector PrimerFinder::get_reference_paths() { + vector reference_paths; + for (const auto& chrom : chroms) { + reference_paths.push_back(chrom.first); + } + return reference_paths; +} + +void PrimerFinder::make_primer(Primer& primer, const string& path_name, + const size_t& starting_node_id, const size_t& offset, const size_t& length, + const bool& is_left) { + + if (is_left) { + primer.left = true; + } else { + primer.left = false; + } + primer.length = length; + string sequence = ""; + handle_t cur_handle = graph->get_handle(starting_node_id); // get the starting node handle + step_handle_t cur_step_handle = graph->steps_of_handle(cur_handle)[0]; + primer.position_template = graph->get_position_of_step(cur_step_handle) + offset; + primer.position_chromosome = primer.position_template; + // Walk down the path and get the sequence of primer + if (graph->get_length(cur_handle) - offset > length) { + sequence += graph->get_sequence(cur_handle).substr(offset, length); + } else { + sequence += graph->get_sequence(cur_handle).substr(offset, graph->get_length(cur_handle) - offset); + while (sequence.size() < length) { + cur_step_handle = graph->get_next_step(cur_step_handle); + cur_handle = graph->get_handle_of_step(cur_step_handle); + sequence += graph->get_sequence(cur_handle).substr(0, min(graph->get_length(cur_handle), length-sequence.size())); + } + } + + if (is_left) { + primer.sequence = sequence; + } else { + primer.sequence = reverse_complement(sequence); // Take the reverse complement for right primer + } + map_to_nodes(primer, path_name); // Search and store corresponding nodes ids +} + +static string get_haplotype_sequence(gbwt::size_type sequence_visit_offset, handle_t start_handle, + handle_t end_handle, const gbwtgraph::GBWTGraph& gbwt_graph, size_t start_max, size_t end_max) { + + string haplotype; + gbwt::edge_type pos = gbwt::edge_type(gbwtgraph::GBWTGraph::handle_to_node(start_handle), sequence_visit_offset); + + if (pos == gbwt::invalid_edge() || pos.first == gbwt::ENDMARKER) { + return haplotype; + } + + handle_t curr = gbwt_graph.node_to_handle(pos.first); + if (curr == end_handle) { + return haplotype; + } + gbwtgraph::view_type view = gbwt_graph.get_sequence_view(curr); + size_t offset = (view.second > start_max ? view.second - start_max : 0); + haplotype.append(view.first + offset, view.second - offset); + + while (true) { + pos = gbwt_graph.index->LF(pos); + if (pos.first == gbwt::ENDMARKER) { + break; + } + curr = gbwtgraph::GBWTGraph::node_to_handle(pos.first); + view = gbwt_graph.get_sequence_view(curr); + if (curr == end_handle) { + haplotype.append(view.first, std::min(view.second, end_max)); + break; + } else { + haplotype.append(view.first, view.second); + } + } + return haplotype; +} + +std::pair PrimerFinder::get_graph_coordinates_from_sequence(const string& seq) { + string ref_name; + int64_t ref_offset; + bool ref_rev; + + //Make an alignment from the sequence + Alignment aln; + aln.set_sequence(seq); + aln.set_name("primer_template"); + + //Map the alignment + vector mapped = giraffe_mapper->map(aln); + + //If there wasn't an alignment, error + if (mapped.empty() || mapped.front().mapping_quality() < 30) { + cerr << "error: Primer filter could not map template sequence " << seq << endl; + return std::make_pair(ref_name, std::numeric_limits::max()); + } + + + //Get the reference paths we want to align to + //This is done automatically + //TODO: These are empty but they could be command line arguments + string path_file; + vector path_names; + vector> sequence_dictionary = get_sequence_dictionary(path_file, path_names, *graph); + unordered_set reference_paths; + reference_paths.reserve(sequence_dictionary.size()); + for (auto& entry : sequence_dictionary) { + reference_paths.insert(get<0>(entry)); + } + + //Surject the alignment onto the reference paths + Surjector surjector(graph); + surjector.surject(mapped.front(), reference_paths, ref_name, ref_offset, ref_rev); + + //TODO: Double check that this is correct. idk why ref_offset is an int and not a size_t + if (ref_rev) { + ref_offset -= seq.size(); + } + assert (graph->has_path(ref_name)); +#ifdef DEBUG_PRIMER_FILTER + cerr << "\tmapped sequence to " << ref_name << " at offset " << ref_offset << endl; +#endif + + return std::make_pair(ref_name, (size_t)ref_offset); +} + + +void PrimerFinder::update_min_max_product_size(PrimerPair& primer_pair) { + if (primer_pair.chromosome_name.empty()) { + return; + } + const auto& sequence_visits = primer_pair.sequence_visits; + + handle_t start_handle = gbwt_graph.get_handle(primer_pair.left_primer.mapped_nodes_ids.front()); + handle_t end_handle = gbwt_graph.get_handle(primer_pair.right_primer.mapped_nodes_ids.back()); + if (start_handle == end_handle) { + primer_pair.min_product_size = primer_pair.linear_product_size; + primer_pair.max_product_size = primer_pair.linear_product_size; + return; + } + + size_t start_max = gbwt_graph.get_length(start_handle) - primer_pair.left_primer.offset; + size_t end_max = primer_pair.right_primer.offset; + size_t minimum_distance = numeric_limits::max(); + size_t maximum_distance = 0; + for (const auto& visit : sequence_visits) { + string haplotype = get_haplotype_sequence(visit.second, start_handle, end_handle, gbwt_graph, start_max, end_max); + if (haplotype.size() < minimum_distance) { + minimum_distance = haplotype.size(); + } + if (haplotype.size() > maximum_distance) { + maximum_distance = haplotype.size(); + } + } + primer_pair.min_product_size = minimum_distance; + primer_pair.max_product_size = maximum_distance; +} + +void PrimerFinder::map_to_nodes(Primer& primer, const string& path_name) { +#ifdef DEBUG_PRIMER_FILTER + cerr << "Map to nodes for primer " << primer.sequence << endl; +#endif + if (path_name.empty()) { + return; + } + path_handle_t reference_path_handle = graph->get_path_handle(path_name); + string primer_seq; + if (primer.left) { + primer_seq = primer.sequence; + } else { + primer_seq = reverse_complement(primer.sequence); + } + + step_handle_t cur_node_step_handle = graph->get_step_at_position(reference_path_handle, primer.position_chromosome); + handle_t cur_node_handle = graph->get_handle_of_step(cur_node_step_handle); + size_t cur_node_length = graph->get_length(cur_node_handle); + size_t cur_node_position = graph->get_position_of_step(cur_node_step_handle); + size_t cur_offset = primer.position_chromosome - cur_node_position; + primer.mapped_nodes_ids.push_back(graph->get_id(cur_node_handle)); + if (primer.left) { + primer.offset = cur_offset; + } + size_t matched_length = 0; + while (cur_node_length - cur_offset < primer.length - matched_length) { + assert(graph->get_sequence(cur_node_handle).substr(cur_offset, cur_node_length - cur_offset) + == primer_seq.substr(matched_length, cur_node_length - cur_offset)); + matched_length += cur_node_length - cur_offset; + cur_offset = 0; + cur_node_step_handle = graph->get_next_step(cur_node_step_handle); + cur_node_handle = graph->get_handle_of_step(cur_node_step_handle); + cur_node_length = graph->get_length(cur_node_handle); + primer.mapped_nodes_ids.push_back(graph->get_id(cur_node_handle)); + } + assert(graph->get_sequence(cur_node_handle).substr(cur_offset, primer.length - matched_length) + == primer_seq.substr(matched_length, primer.length - matched_length)); + if (!primer.left) { + primer.offset = cur_offset + primer.length - matched_length; + } +} + +size_t PrimerFinder::longest_match_len(Primer& primer, const string& left_seq, + const string& right_seq, const bool& first_node) { + size_t llen = left_seq.size(), rlen = right_seq.size(); + size_t length = min(llen, rlen); + size_t longest_match = 0; + + if (first_node) { + if (llen >= rlen) { + // Check if the first node contains the entire sequence of the priemr + for (size_t i = 0; i <= llen - rlen; i++) { + if (left_seq.substr(i, rlen) == right_seq) { + longest_match = rlen; + primer.offset = (primer.left) ? i : i + primer.sequence.size(); + return longest_match; + } + } + } + for (size_t i = 1; i <= length; i++) { + // Find the length of match between first node sequence's suffix and primer sequnece's prefix + if (left_seq.substr(llen - i, i) == right_seq.substr(0, i)) { + longest_match = i; + primer.offset = (primer.left && first_node) ? llen - i : i; + } + } + } else { + for (size_t i = 1; i <= length; i++) { + // Find the length of match between downstream nodes seqeunces and primer sequence + if (left_seq.substr(0, i) == right_seq.substr(0, i)) { + longest_match = i; + primer.offset = (!primer.left) ? i : primer.offset; + } + } + } + + return longest_match; +} + +const string PrimerFinder::strip(const string& s) const { + const string WHITESPACE = " \n\r\t\f\v"; + size_t end = s.find_last_not_of(WHITESPACE); + size_t start = s.find_first_not_of(WHITESPACE); + if (end == string::npos) { + return ""; + } + return s.substr(start, end+1); +} + +vector get_sequence_visits(const handle_t& handle, + const gbwt::FastLocate& r_index, const gbwtgraph::GBWTGraph& gbwt_graph) { + + vector sa = r_index.decompressSA(gbwt_graph.handle_to_node(handle)); + vector result; + result.reserve(sa.size()); + for (size_t i = 0; i < sa.size(); i++) { + result.push_back({ sa[i], i }); + } + std::sort(result.begin(), result.end(), [&](HaplotypePartitioner::sequence_type a, HaplotypePartitioner::sequence_type b) -> bool { + gbwt::size_type a_id = r_index.seqId(a.first); + gbwt::size_type a_offset = r_index.seqOffset(a.first); + gbwt::size_type b_id = r_index.seqId(b.first); + gbwt::size_type b_offset = r_index.seqOffset(b.first); + return ((a_id < b_id) || ((a_id == b_id) && (a_offset > b_offset))); + }); + return result; +} + +static void sa_to_da(std::vector& sequences, const gbwt::FastLocate& r_index) { + for (auto& sequence : sequences) { + sequence.first = r_index.seqId(sequence.first); + } +} + +void PrimerFinder::update_variation(PrimerPair& primer_pair, const string& path_name) { +#ifdef DEBUG_PRIMER_FILTER + cerr << "Update variation" << endl; +#endif + if (path_name.empty()) { + return; + } + const vector& left_primer_node_ids = primer_pair.left_primer.mapped_nodes_ids; + const vector& right_primer_node_ids = primer_pair.right_primer.mapped_nodes_ids; + vector nodes_id; + merge(left_primer_node_ids.begin(), left_primer_node_ids.end(), + right_primer_node_ids.begin(), right_primer_node_ids.end(), back_inserter(nodes_id)); + handle_t cur_handle = gbwt_graph.get_handle(nodes_id[0]); + auto sequence_visits = get_sequence_visits(cur_handle, r_index, gbwt_graph); + sa_to_da(sequence_visits, r_index); + + for (size_t i = 1; i < nodes_id.size(); i++) { + cur_handle = gbwt_graph.get_handle(nodes_id[i]); + auto cur_sequence_visits = get_sequence_visits(cur_handle, r_index, gbwt_graph); + sa_to_da(cur_sequence_visits, r_index); + unordered_set seq_ids; + for (const auto& seq_visit : cur_sequence_visits) { + seq_ids.insert(seq_visit.first); + } + + sequence_visits.erase( + remove_if( + sequence_visits.begin(), + sequence_visits.end(), + [&seq_ids](const HaplotypePartitioner::sequence_type& visit) { + return seq_ids.find(visit.first) == seq_ids.end(); + } + ), + sequence_visits.end() + ); + } + + auto unique_haplotypes = sequence_visits; + auto it = unique(unique_haplotypes.begin(), unique_haplotypes.end(), [this](const auto& a, const auto& b) { + const gbwt::PathName& path_name_a = this->gbwt_graph.index->metadata.path(gbwt::Path::id(a.first)); + const gbwt::PathName& path_name_b = this->gbwt_graph.index->metadata.path(gbwt::Path::id(b.first)); + return (path_name_a.sample == path_name_b.sample) && (path_name_a.phase == path_name_b.phase); + }); + unique_haplotypes.erase(it, unique_haplotypes.end()); + + primer_pair.sequence_visits = sequence_visits; + primer_pair.variation_level = static_cast(unique_haplotypes.size()) / static_cast(gbwt_graph.index->metadata.haplotypes()); + +} + +vector PrimerFinder::split(const string& str) { + istringstream iss(str); + string field; + vector fields; + + while (iss >> field) { + fields.push_back(field); + } + return fields; +} + +vector PrimerFinder::split(const string& str, const char& delim) { + istringstream iss(str); + string field; + vector fields; + + while (getline(iss, field, delim)) { + fields.push_back(field); + } + + return fields; +} + + +bool PrimerFinder::startswith(const string& str, const string& prefix) { + return str.compare(0, prefix.length(), prefix) == 0; +} + +} diff --git a/src/primer_filter.hpp b/src/primer_filter.hpp new file mode 100644 index 0000000000..42c760922c --- /dev/null +++ b/src/primer_filter.hpp @@ -0,0 +1,207 @@ +// primer_filter.hpp +// +// Contains class Primer_finder for storing and filtering primers predicted +// using Primer3. Also contains Primer struct and Primer_pair struct that stores +// information on primers and primer pairs. + +#ifndef VG_PRIMER_FILTER_HPP_INCLUDED +#define VG_PRIMER_FILTER_HPP_INCLUDED + +// Not sure what to include.. Will include everything from the unittest for now +#include +#include +#include +#include +#include +#include +#include +#include +#include "utility.hpp" +#include "snarl_distance_index.hpp" +#include "minimizer_mapper.hpp" +#include "integrated_snarl_finder.hpp" +#include "genotypekit.hpp" +#include "traversal_finder.hpp" +#include +#include +#include "../primer_filter.hpp" +#include "../recombinator.hpp" + +using namespace std; + +namespace vg { + +/** + * Primer struct contains primer attributes, including sequence, left/right primer, + * position on the reference genome, length, index offset in corresponding node on + * sequence graph, and vector of corresponding nodes on the sequence graph. Everything + * is in the positive/forward orientation. + */ +struct Primer { + string sequence; + bool left = true; + size_t position_chromosome = numeric_limits::max(); + size_t position_template = numeric_limits::max(); + size_t length = numeric_limits::max(); + size_t offset = numeric_limits::max(); + vector mapped_nodes_ids; +}; + +/** + * Primer_pair struct contains primer pair attributesm including left primer, right primer, + * linear product size, minimum and maximum product size on the sequence graph, and boolean on + * whether the primers locate in low variation region of the sequence graph. + */ +struct PrimerPair { + Primer left_primer; + Primer right_primer; + string chromosome_name; + string template_feature; + size_t linear_product_size = numeric_limits::max(); + size_t template_position = numeric_limits::max(); + size_t min_product_size = numeric_limits::max(); + size_t max_product_size = numeric_limits::max(); + double variation_level = 0.0; + vector sequence_visits; +}; + +class PrimerFinder { + +private: + unordered_map> chroms; // map containing a vector of primer pairs for each chromosome + const PathPositionHandleGraph* graph; + const SnarlDistanceIndex* distance_index; + MinimizerMapper* giraffe_mapper; + const gbwtgraph::GBWTGraph& gbwt_graph; + const gbwt::GBWT& gbwt_index; + const gbwt::FastLocate& r_index; + + +public: + PrimerFinder() = default; + + /** + * Construct Primer finder given PathPositionHandleGraph, reference graph name + * and pointer to SnarlDistanceIndex + */ + PrimerFinder(const unique_ptr& graph_param, + const SnarlDistanceIndex* distance_index_param, + ifstream& primers_file_handle, + const gbwtgraph::GBWTGraph& gbwt_graph, const gbwt::GBWT& gbwt_index, + const gbwt::FastLocate& r_index, MinimizerMapper* giraffe_mapper_param=nullptr); + + /** + * Destructor + */ + ~PrimerFinder(); + + /** + * Add a Primer_pair object given primers' starting node id, offset relative + * to the starting node, and length, all in the POSTIVE orientation. The new + * primer_pair object is automatically added to primer_pairs vector - and + * selected_primer_pairs if conditions are met. Mainly used for unit testing. + */ + void add_primer_pair(const string& path_name, const size_t& left_primer_starting_node_id, + const size_t& left_primer_offset, const size_t& left_primer_length, + const size_t& right_primer_starting_node_id, + const size_t& right_primer_offset, const size_t& right_primer_length); + + /** + * Read the path to the primer3 output. Primers information is parsed, + * processed, and stored in primer_pairs vector - and selected_primer_pairs + * if conditions are met. + */ + void load_primers(ifstream& file_handle); + + /** + * return vector of Primer pairs + */ + const vector& get_primer_pairs_of_chrom(const string& chrom_name) const; + + /** + * return the total number of reference paths + */ + const size_t total_reference_paths() const; + + vector get_reference_paths(); + +private: + /** + * Private functions used by public or private functions. + */ + + /** + * Get the graph coordinates by mapping and surjecting the template + * To be used if the chromosome_name isn't a valid path + * Returns a pair of the path/chromosome name and the offset of the template in the path + * Used in: load_primers + */ + std::pair get_graph_coordinates_from_sequence(const string& seq); + + + /** + * Update minimum and maximum prodcut to a primer pair object. + * Used in: add_primer_pair + * load_primers + */ + void update_min_max_product_size(PrimerPair& primer_pair); + + /** + * Update a Primer object given starting node id, offset relative to the starting node, + * and the length of primer. + * Used in: add_primer_pair + */ + void make_primer(Primer& primer, const string& path_name, const size_t& starting_node_id, + const size_t& offset, const size_t& length, const bool& is_left); + + /** + * Find and store corresponding node ids to Primer object. + * Used in: make_primer + * load_primers + */ + void map_to_nodes(Primer& primer, const string& path_name); + + /** + * Find the length of the longest match between two sequences. Also find and + * store offset in Primer object. + * Used in: map_to_nodes + */ + size_t longest_match_len(Primer& primer, const string& left_seq, const string& right_seq, + const bool& first_node); + + /** + * Strip empty spaces on the sides of a string. + * Used in: load_primers + */ + const string strip(const string& s) const; + + /** + * Check if primers in a primer_pair object have variations on the pangenome. + * Used in: add_primer_node + * load_primers + */ + // const bool no_variation_at_primers(const PrimerPair& primer_pair) const; + + void update_variation(PrimerPair& primer_pair, const string& path_name); + + /** + * Split a string into vectors. + * Used in: load_primers + */ + vector split(const string& str); + + /** + * Split a string into vectors given delimiter. + */ + vector split(const string& str, const char& delim); + + /** + * Works like str.startswith(prefix) in python + * Used in: load_primers + */ + bool startswith(const string& str, const string& prefix); +}; + +} + +#endif /* primer_filter_hpp */ diff --git a/src/subcommand/primers_main.cpp b/src/subcommand/primers_main.cpp new file mode 100644 index 0000000000..4b33a952f5 --- /dev/null +++ b/src/subcommand/primers_main.cpp @@ -0,0 +1,264 @@ +#include + +#include +#include + +#include + +#include "../primer_filter.hpp" +#include "../snarl_distance_index.hpp" + +using namespace std; +using namespace vg; +using namespace vg::subcommand; + +void help_primers(char** argv) { + cerr << "usage: " << argv[0] << " primers [options] input.primer3 > filtered_primers.out" << endl + << endl + << "options:" << endl + << " -x, --xg-path FILE use this xg graph (required)" << endl + << " -d, --dist-index FILE use this distance index (required)" << endl + << " -r, --r-index FILE use this r index (required)" << endl + << " -g, --gbz FILE use this gbz file (required)" << endl + << " -M, --minimizers FILE use this minimizer file for mapping the template sequence, if necessary" << endl + << " -Z, --zipcodes FILE use this zipcode file for mapping the template sequence, if necessary" << endl + << " -v, --variation-threshold DOUBLE output primers that work for at least this percentage of haplotypes (default: 0.8)" << endl + << " -l, --tolerance INT allow this much difference between minimum and maximum sizes compared to the linear product size (default: 10)" << endl + << " -n, --minimum-size INT minimum product size allowed (has precedence over --tolerance)" << endl + << " -m, --maximum-size INT maximum product size allowed (has precedence over --tolerance)" << endl + << " -a, --all-primers output all primers" << endl; +} + +size_t difference(const size_t& a, const size_t& b) { + size_t diff; + if (a == b) { + diff = 0; + } else if (a > b) { + diff = a - b; + } else { + diff = b - a; + } + return diff; +} + +void print_tabular(const string& genome_name, const PrimerPair& primer_pair) { + const Primer& left_primer = primer_pair.left_primer; + const Primer& right_primer = primer_pair.right_primer; + size_t rln = right_primer.mapped_nodes_ids.size() - 1; //right primer last node index + cout << genome_name << "\t" + << primer_pair.template_feature << "\t" + << primer_pair.template_position << "\t" + << left_primer.sequence << "\t" + << right_primer.sequence << "\t" + << left_primer.position_template << "\t" + << right_primer.position_template << "\t" + << left_primer.position_chromosome << "\t" + << right_primer.position_chromosome << "\t" + << left_primer.mapped_nodes_ids[0] << "\t" + << right_primer.mapped_nodes_ids[rln] << "\t" + << left_primer.length << "\t" + << right_primer.length << "\t" + << primer_pair.linear_product_size << "\t" + << primer_pair.min_product_size << "\t" + << primer_pair.max_product_size << "\t" + << primer_pair.variation_level << endl; +} + +int main_primers(int argc, char** argv) { + + if (argc == 2) { + help_primers(argv); + return 1; + } + + string xg_path; + string distance_index_path; + string ri_path; + string gbz_path; + string min_path; + string zip_path; + bool zero_variation = false; + bool all_primers = false; + int tolerance = 10; + double variation_threshold = 0.8; + int minimum_product_size = numeric_limits::max(); + int maximum_product_size = numeric_limits::max(); + + int c; + optind = 2; + + while (true) { + static struct option long_options[] = + { + {"help", no_argument, 0, 'h'}, + {"xg-path", required_argument, 0, 'x'}, + {"dist-index", required_argument, 0, 'd'}, + {"ri-path", required_argument, 0, 'r'}, + {"gbz-path", required_argument, 0, 'g'}, + {"minimizers", required_argument, 0, 'M'}, + {"zipcodes", required_argument, 0, 'Z'}, + {"variation-threshold", required_argument, 0, 'v'}, + {"tolerance", required_argument, 0, 'l'}, + {"minimum-size", required_argument, 0, 'n'}, + {"maximum-size", required_argument, 0, 'm'}, + {"all-primers", required_argument, 0, 'a'}, + {0, 0, 0, 0} + }; + + int option_index = 0; + c = getopt_long (argc, argv, "hx:d:r:g:M:Z:v:l:n:m:a", long_options, &option_index); + + // Detect the end of the options. + if (c == -1) break; + + switch (c) + { + case 'x': + xg_path = optarg; + break; + + case 'd': + distance_index_path = optarg; + break; + + case 'r': + ri_path = optarg; + break; + + case 'g': + gbz_path = optarg; + break; + + case 'M': + min_path = optarg; + break; + + case 'Z': + zip_path = optarg; + break; + + case 'v': + variation_threshold = parse(optarg); + break; + + case 'l': + tolerance = parse(optarg); + break; + + case 'n': + minimum_product_size = parse(optarg); + break; + + case 'm': + maximum_product_size = parse(optarg); + break; + + case 'a': + all_primers = true; + break; + + case 'h': + case '?': + help_primers(argv); + exit(1); + break; + + default: + abort (); + } + } + + if (xg_path.empty()) { + cerr << "error:[vg primers] xg file (-x) is required" << endl; + exit(1); + } + + if (distance_index_path.empty()) { + cerr << "error:[vg primers] distance index file (-d) is required" << endl; + exit(1); + } + + if (ri_path.empty()) { + cerr << "error:[vg primers] r index file (-r) is required" << endl; + exit(1); + } + + if (gbz_path.empty()) { + cerr << "error:[vg primers] gbz file (-g) is required" << endl; + exit(1); + } + + string primers_path = get_input_file_name(optind, argc, argv); + + SnarlDistanceIndex distance_index; + unique_ptr graph; + gbwtgraph::GBWTGraph gbwt_graph; + gbwt::GBWT gbwt_index; + gbwt::FastLocate r_index; + load_r_index(r_index, ri_path); + load_gbz(gbwt_index, gbwt_graph, gbz_path); + gbwt_graph.set_gbwt(gbwt_index); + r_index.setGBWT(gbwt_index); + distance_index.deserialize(distance_index_path); + graph = vg::io::VPKG::load_one(xg_path); + ifstream file_handle(primers_path); + MinimizerMapper* giraffe_mapper = nullptr; + unique_ptr minimizer_index; + ZipCodeCollection zipcodes; + if (!min_path.empty()) { + minimizer_index = vg::io::VPKG::load_one(min_path); + if (!zip_path.empty()) { + ifstream zip_in (zip_path); + zipcodes.deserialize(zip_in); + zip_in.close(); + } + } + //The minimizer mapper needs to be declared here to keep it around in memory + //So sometimes make it with empty indexes but only keep the pointer to it if we had minimizers + MinimizerMapper minimizer_mapper (gbwt_graph, *minimizer_index, &distance_index, &zipcodes); + if (!min_path.empty()) { + //Set parameters + //TODO: I'm not actually sure about this because the sequence is long but it should match a path exactly so it should get the whole alignment at gapless extension + minimizer_mapper.align_from_chains = true; + giraffe_mapper = &minimizer_mapper; + } + PrimerFinder primer_finder(graph, &distance_index, file_handle, gbwt_graph, gbwt_index, r_index, giraffe_mapper); + + cout << "chrom\ttplfeat\ttplpos\tlpseq\trpseq\tlppostpl\trppostmp\tlpposchrom\trpposchrom\t" + << "lpnid\trpnid\tlplen\trplen\tlinsize\tminsize\tmaxsize\tvarlevel" << endl; + + vector reference_paths = primer_finder.get_reference_paths(); + for (size_t i = 0; i < reference_paths.size(); ++i) { + string path_name = reference_paths[i]; + const vector& primer_pairs = primer_finder.get_primer_pairs_of_chrom(path_name); + for (size_t j = 0; j < primer_pairs.size(); ++j) { + const PrimerPair& primer_pair = primer_pairs[j]; + if (all_primers) { + print_tabular(path_name, primer_pair); + } else { + if (minimum_product_size != numeric_limits::max() && + primer_pair.min_product_size < minimum_product_size) { + continue; + } + if (maximum_product_size != numeric_limits::max() && + primer_pair.max_product_size > maximum_product_size) { + continue; + } + + if (difference(primer_pair.linear_product_size, primer_pair.min_product_size) > tolerance + || difference(primer_pair.linear_product_size, primer_pair.max_product_size) > tolerance) { + continue; + } + + if (primer_pair.variation_level < variation_threshold) { + continue; + } + print_tabular(path_name, primer_pair); + } + } + } + + return 0; +} + +static Subcommand vg_primers("primers", "filter primers for low variation", main_primers); diff --git a/src/unittest/primer_filter.cpp b/src/unittest/primer_filter.cpp new file mode 100644 index 0000000000..44a7aa2882 --- /dev/null +++ b/src/unittest/primer_filter.cpp @@ -0,0 +1,400 @@ +// +// primers.cpp +// +// Unit tests for primer filter +// + +#include +#include +#include +#include +#include +#include +#include +#include "vg/io/json2pb.h" +#include +#include "catch.hpp" +#include "random_graph.hpp" +#include "randomness.hpp" +#include "../snarl_distance_index.hpp" +#include "../integrated_snarl_finder.hpp" +#include "../genotypekit.hpp" +#include "../traversal_finder.hpp" +#include +#include +#include "xg.hpp" +#include "../primer_filter.hpp" +#include "../recombinator.hpp" + +namespace vg { +namespace unittest { + +using namespace std; + + + TEST_CASE( "filter simple primers", + "[primer_filter]" ) { + + SnarlDistanceIndex distance_index; + unique_ptr graph; + gbwtgraph::GBWTGraph gbwt_graph; + gbwt::GBWT gbwt_index; + gbwt::FastLocate r_index; + string snarl_index_path = "primers/y.dist"; + string xg_graph_path = "primers/y.xg"; + distance_index.deserialize(snarl_index_path); + graph = vg::io::VPKG::load_one(xg_graph_path); + load_r_index(r_index, "primers/y.ri"); + load_gbz(gbwt_index, gbwt_graph, "primers/y.giraffe.gbz"); + gbwt_graph.set_gbwt(gbwt_index); + r_index.setGBWT(gbwt_index); + + SECTION("template_position=0") { + string primers_path = "primers/y.primer3_with_ref_pos.out"; + ifstream file_handle(primers_path); + PrimerFinder primer_finder(graph, &distance_index, file_handle, gbwt_graph, gbwt_index, r_index); + + SECTION("Loads the correct number of chromosomes") { + REQUIRE(primer_finder.total_reference_paths() == 1); + } + + SECTION("Loads the correct number of primer pairs") { + REQUIRE(primer_finder.get_primer_pairs_of_chrom("y").size() == 5); + } + + SECTION("Loads and processes the primers correctly") { + primer_finder.add_primer_pair("y", 9, 14, 20, 22, 0, 20); // made up data, variation both at primers and in product + primer_finder.add_primer_pair("y", 31, 0, 15, 34, 1, 15); // made up data, no variation at primers or in product + + // Correct primer attributes + const vector left_primers_sequences { + "TGCCTGGCATAGAGGAAAGC", "GAGTCGAGGCTCAAGGACAG", "CAGAGTCGAGGCTCAAGGAC", + "GAGGCTCAAGGACAGCTCTC", "TCCAGAAGCTGCTCTTTCCC", "AGCCAGACAAATCTGGGTTC", + "CAACTGGTAGTTACT" + }; + + const vector left_primers_positions { + 362, 620, 618, 625, 819, 181, 388 + }; + + const vector left_primers_lengths { + 20, 20, 20, 20, 20, 20, 15 + }; + + const vector left_primers_nodes_count { + 2, 1, 1, 2, 2, 6, 1 + }; + + const vector right_primers_sequences { + "GCCAGAAGAGCCTCAAGGAG", "AGGAGAGCTGGGAAAAGGGA", "AGGAGAGCTGGGAAAAGGGA", + "AGGAGAGCTGGGAAAAGGGA", "GCCTGGGTAGCTTTGGATGT", "AGATAATTAAACTGAAGTTC", + "GTTGACAATGAAAAG" + }; + + const vector right_primers_positions { + 466, 745, 745, 745, 935, 260, 485 + }; + + const vector right_primers_lengths { + 20, 20, 20, 20, 20, 20, 15 + }; + + const vector right_primers_nodes_count { + 2, 1, 1, 1, 2, 3, 1 + }; + + const vector min_product_sizes { + 124, 142, 144, 137, 136, 99, 112 + }; + + const vector max_product_sizes { + 124, 145, 147, 140, 137, 99, 112 + }; + + const vector linear_product_sizes { + 124, 145, 147, 140, 136, 99, 112 + }; + + const vector variation_level { + 1.0, 1.0, 1.0, 1.0, 1.0, 0.33333, 1.0 + }; + + + const vector& primer_pairs = primer_finder.get_primer_pairs_of_chrom("y"); + + REQUIRE(primer_pairs.size() == left_primers_sequences.size()); + for (size_t i = 0; i < primer_pairs.size(); ++i) { + REQUIRE(left_primers_nodes_count[i] == primer_pairs[i].left_primer.mapped_nodes_ids.size()); + REQUIRE(left_primers_sequences[i] == primer_pairs[i].left_primer.sequence); + REQUIRE(left_primers_positions[i] == primer_pairs[i].left_primer.position_chromosome); + REQUIRE(left_primers_lengths[i] == primer_pairs[i].left_primer.length); + REQUIRE(right_primers_nodes_count[i] == primer_pairs[i].right_primer.mapped_nodes_ids.size()); + REQUIRE(right_primers_sequences[i] == primer_pairs[i].right_primer.sequence); + REQUIRE(right_primers_positions[i] == primer_pairs[i].right_primer.position_chromosome); + REQUIRE(right_primers_lengths[i] == primer_pairs[i].right_primer.length); + REQUIRE(linear_product_sizes[i] == primer_pairs[i].linear_product_size); + REQUIRE(min_product_sizes[i] == primer_pairs[i].min_product_size); + REQUIRE(max_product_sizes[i] == primer_pairs[i].max_product_size); + REQUIRE(abs(variation_level[i] - primer_pairs[i].variation_level) <= 0.0001); + } + + SECTION("Check that primers are assigned with correct nodes") { + vector pair_0_left_primer_nodes {27, 28}; + for (size_t i = 0; i < primer_pairs[0].left_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[0].left_primer.mapped_nodes_ids[i] == pair_0_left_primer_nodes[i]); + } + + vector pair_0_right_primer_nodes {33, 34}; + for (size_t i = 0; i < primer_pairs[0].right_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[0].right_primer.mapped_nodes_ids[i] == pair_0_right_primer_nodes[i]); + } + + vector pair_5_left_primer_nodes {9, 11, 12, 14, 15, 17}; + for (size_t i = 0; i < primer_pairs[5].left_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[5].left_primer.mapped_nodes_ids[i] == pair_5_left_primer_nodes[i]); + } + + vector pair_5_right_primer_nodes {22, 24, 25}; + for (size_t i = 0; i < primer_pairs[5].right_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[5].right_primer.mapped_nodes_ids[i] == pair_5_right_primer_nodes[i]); + } + } + + } + } + + SECTION("template_position=11") { + string primers_path = "primers/y.primer3_with_ref_pos_11.out"; + ifstream file_handle(primers_path); + PrimerFinder primer_finder(graph, &distance_index, file_handle, gbwt_graph, gbwt_index, r_index); + + SECTION("Loads the correct number of chromosomes") { + REQUIRE(primer_finder.total_reference_paths() == 1); + } + + SECTION("Loads the correct number of primer pairs") { + REQUIRE(primer_finder.get_primer_pairs_of_chrom("y").size() == 5); + } + + SECTION("Loads and processes the primers correctly") { + primer_finder.add_primer_pair("y", 9, 14, 20, 22, 0, 20); // made up data, variation both at primers and in product + primer_finder.add_primer_pair("y", 31, 0, 15, 34, 1, 15); // made up data, no variation at primers or in product + + // Correct primer attributes + const vector left_primers_sequences { + "TGCCTGGCATAGAGGAAAGC", "GAGTCGAGGCTCAAGGACAG", "CAGAGTCGAGGCTCAAGGAC", + "GAGGCTCAAGGACAGCTCTC", "TCCAGAAGCTGCTCTTTCCC", "AGCCAGACAAATCTGGGTTC", + "CAACTGGTAGTTACT" + }; + + const vector left_primers_positions { + 362, 620, 618, 625, 819, 181, 388 + }; + + const vector left_primers_lengths { + 20, 20, 20, 20, 20, 20, 15 + }; + + const vector left_primers_nodes_count { + 2, 1, 1, 2, 2, 6, 1 + }; + + const vector right_primers_sequences { + "GCCAGAAGAGCCTCAAGGAG", "AGGAGAGCTGGGAAAAGGGA", "AGGAGAGCTGGGAAAAGGGA", + "AGGAGAGCTGGGAAAAGGGA", "GCCTGGGTAGCTTTGGATGT", "AGATAATTAAACTGAAGTTC", + "GTTGACAATGAAAAG" + }; + + const vector right_primers_positions { + 466, 745, 745, 745, 935, 260, 485 + }; + + const vector right_primers_lengths { + 20, 20, 20, 20, 20, 20, 15 + }; + + const vector right_primers_nodes_count { + 2, 1, 1, 1, 2, 3, 1 + }; + + const vector min_product_sizes { + 124, 142, 144, 137, 136, 99, 112 + }; + + const vector max_product_sizes { + 124, 145, 147, 140, 137, 99, 112 + }; + + const vector linear_product_sizes { + 124, 145, 147, 140, 136, 99, 112 + }; + + const vector variation_level { + 1.0, 1.0, 1.0, 1.0, 1.0, 0.33333, 1.0 + }; + + + const vector& primer_pairs = primer_finder.get_primer_pairs_of_chrom("y"); + + REQUIRE(primer_pairs.size() == left_primers_sequences.size()); + for (size_t i = 0; i < primer_pairs.size(); ++i) { + REQUIRE(left_primers_nodes_count[i] == primer_pairs[i].left_primer.mapped_nodes_ids.size()); + REQUIRE(left_primers_sequences[i] == primer_pairs[i].left_primer.sequence); + REQUIRE(left_primers_positions[i] == primer_pairs[i].left_primer.position_chromosome); + REQUIRE(left_primers_lengths[i] == primer_pairs[i].left_primer.length); + REQUIRE(right_primers_nodes_count[i] == primer_pairs[i].right_primer.mapped_nodes_ids.size()); + REQUIRE(right_primers_sequences[i] == primer_pairs[i].right_primer.sequence); + REQUIRE(right_primers_positions[i] == primer_pairs[i].right_primer.position_chromosome); + REQUIRE(right_primers_lengths[i] == primer_pairs[i].right_primer.length); + REQUIRE(linear_product_sizes[i] == primer_pairs[i].linear_product_size); + REQUIRE(min_product_sizes[i] == primer_pairs[i].min_product_size); + REQUIRE(max_product_sizes[i] == primer_pairs[i].max_product_size); + REQUIRE(abs(variation_level[i] - primer_pairs[i].variation_level) <= 0.0001); + } + + SECTION("Check that primers are assigned with correct nodes") { + vector pair_0_left_primer_nodes {27, 28}; + for (size_t i = 0; i < primer_pairs[0].left_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[0].left_primer.mapped_nodes_ids[i] == pair_0_left_primer_nodes[i]); + } + + vector pair_0_right_primer_nodes {33, 34}; + for (size_t i = 0; i < primer_pairs[0].right_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[0].right_primer.mapped_nodes_ids[i] == pair_0_right_primer_nodes[i]); + } + + vector pair_5_left_primer_nodes {9, 11, 12, 14, 15, 17}; + for (size_t i = 0; i < primer_pairs[5].left_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[5].left_primer.mapped_nodes_ids[i] == pair_5_left_primer_nodes[i]); + } + + vector pair_5_right_primer_nodes {22, 24, 25}; + for (size_t i = 0; i < primer_pairs[5].right_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[5].right_primer.mapped_nodes_ids[i] == pair_5_right_primer_nodes[i]); + } + } + + } + } + SECTION("template_position=11, no path name") { + string primers_path = "primers/y.primer3_with_ref_pos_11.nopath.out"; + ifstream file_handle(primers_path); + unique_ptr minimizer_index = vg::io::VPKG::load_one("primers/y.min"); + ZipCodeCollection oversized_zipcodes; + ifstream zip_in ("primers/y.zipcodes"); + oversized_zipcodes.deserialize(zip_in); + zip_in.close(); + MinimizerMapper giraffe_mapper(gbwt_graph, *minimizer_index, &distance_index, &oversized_zipcodes); + PrimerFinder primer_finder(graph, &distance_index, file_handle, gbwt_graph, gbwt_index, r_index, &giraffe_mapper); + + SECTION("Loads the correct number of chromosomes") { + REQUIRE(primer_finder.total_reference_paths() == 1); + } + + SECTION("Loads the correct number of primer pairs") { + REQUIRE(primer_finder.get_primer_pairs_of_chrom("y").size() == 5); + } + + SECTION("Loads and processes the primers correctly") { + primer_finder.add_primer_pair("y", 9, 14, 20, 22, 0, 20); // made up data, variation both at primers and in product + primer_finder.add_primer_pair("y", 31, 0, 15, 34, 1, 15); // made up data, no variation at primers or in product + + // Correct primer attributes + const vector left_primers_sequences { + "TGCCTGGCATAGAGGAAAGC", "GAGTCGAGGCTCAAGGACAG", "CAGAGTCGAGGCTCAAGGAC", + "GAGGCTCAAGGACAGCTCTC", "TCCAGAAGCTGCTCTTTCCC", "AGCCAGACAAATCTGGGTTC", + "CAACTGGTAGTTACT" + }; + + const vector left_primers_positions { + 362, 620, 618, 625, 819, 181, 388 + }; + + const vector left_primers_lengths { + 20, 20, 20, 20, 20, 20, 15 + }; + + const vector left_primers_nodes_count { + 2, 1, 1, 2, 2, 6, 1 + }; + + const vector right_primers_sequences { + "GCCAGAAGAGCCTCAAGGAG", "AGGAGAGCTGGGAAAAGGGA", "AGGAGAGCTGGGAAAAGGGA", + "AGGAGAGCTGGGAAAAGGGA", "GCCTGGGTAGCTTTGGATGT", "AGATAATTAAACTGAAGTTC", + "GTTGACAATGAAAAG" + }; + + const vector right_primers_positions { + 466, 745, 745, 745, 935, 260, 485 + }; + + const vector right_primers_lengths { + 20, 20, 20, 20, 20, 20, 15 + }; + + const vector right_primers_nodes_count { + 2, 1, 1, 1, 2, 3, 1 + }; + + const vector min_product_sizes { + 124, 142, 144, 137, 136, 99, 112 + }; + + const vector max_product_sizes { + 124, 145, 147, 140, 137, 99, 112 + }; + + const vector linear_product_sizes { + 124, 145, 147, 140, 136, 99, 112 + }; + + const vector variation_level { + 1.0, 1.0, 1.0, 1.0, 1.0, 0.33333, 1.0 + }; + + + const vector& primer_pairs = primer_finder.get_primer_pairs_of_chrom("y"); + + REQUIRE(primer_pairs.size() == left_primers_sequences.size()); + for (size_t i = 0; i < primer_pairs.size(); ++i) { + REQUIRE(left_primers_nodes_count[i] == primer_pairs[i].left_primer.mapped_nodes_ids.size()); + REQUIRE(left_primers_sequences[i] == primer_pairs[i].left_primer.sequence); + REQUIRE(left_primers_positions[i] == primer_pairs[i].left_primer.position_chromosome); + REQUIRE(left_primers_lengths[i] == primer_pairs[i].left_primer.length); + REQUIRE(right_primers_nodes_count[i] == primer_pairs[i].right_primer.mapped_nodes_ids.size()); + REQUIRE(right_primers_sequences[i] == primer_pairs[i].right_primer.sequence); + REQUIRE(right_primers_positions[i] == primer_pairs[i].right_primer.position_chromosome); + REQUIRE(right_primers_lengths[i] == primer_pairs[i].right_primer.length); + REQUIRE(linear_product_sizes[i] == primer_pairs[i].linear_product_size); + REQUIRE(min_product_sizes[i] == primer_pairs[i].min_product_size); + REQUIRE(max_product_sizes[i] == primer_pairs[i].max_product_size); + REQUIRE(abs(variation_level[i] - primer_pairs[i].variation_level) <= 0.0001); + } + + SECTION("Check that primers are assigned with correct nodes") { + vector pair_0_left_primer_nodes {27, 28}; + for (size_t i = 0; i < primer_pairs[0].left_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[0].left_primer.mapped_nodes_ids[i] == pair_0_left_primer_nodes[i]); + } + + vector pair_0_right_primer_nodes {33, 34}; + for (size_t i = 0; i < primer_pairs[0].right_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[0].right_primer.mapped_nodes_ids[i] == pair_0_right_primer_nodes[i]); + } + + vector pair_5_left_primer_nodes {9, 11, 12, 14, 15, 17}; + for (size_t i = 0; i < primer_pairs[5].left_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[5].left_primer.mapped_nodes_ids[i] == pair_5_left_primer_nodes[i]); + } + + vector pair_5_right_primer_nodes {22, 24, 25}; + for (size_t i = 0; i < primer_pairs[5].right_primer.mapped_nodes_ids.size(); i++) { + REQUIRE(primer_pairs[5].right_primer.mapped_nodes_ids[i] == pair_5_right_primer_nodes[i]); + } + } + + } + } + } +} +} diff --git a/test/primers/index.dist b/test/primers/index.dist new file mode 100644 index 0000000000..0deabd8490 Binary files /dev/null and b/test/primers/index.dist differ diff --git a/test/primers/y.dist b/test/primers/y.dist new file mode 100644 index 0000000000..0113e77172 Binary files /dev/null and b/test/primers/y.dist differ diff --git a/test/primers/y.gbwt b/test/primers/y.gbwt new file mode 100644 index 0000000000..2fb458fa8f Binary files /dev/null and b/test/primers/y.gbwt differ diff --git a/test/primers/y.gg b/test/primers/y.gg new file mode 100644 index 0000000000..d7330a802c Binary files /dev/null and b/test/primers/y.gg differ diff --git a/test/primers/y.giraffe.gbz b/test/primers/y.giraffe.gbz new file mode 100644 index 0000000000..c85a5bacce Binary files /dev/null and b/test/primers/y.giraffe.gbz differ diff --git a/test/primers/y.min b/test/primers/y.min new file mode 100644 index 0000000000..3ef7b55c9d Binary files /dev/null and b/test/primers/y.min differ diff --git a/test/primers/y.primer3.split.config b/test/primers/y.primer3.split.config new file mode 100644 index 0000000000..e34445c97b --- /dev/null +++ b/test/primers/y.primer3.split.config @@ -0,0 +1,24 @@ +SEQUENCE_ID=y,100 +SEQUENCE_TEMPLATE=TTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTTCAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTACTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTTACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAACA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 += +SEQUENCE_ID=y,601 +SEQUENCE_TEMPLATE=GACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAACAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATCTTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAGCTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATGA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 += diff --git a/test/primers/y.primer3_with_ref_pos.out b/test/primers/y.primer3_with_ref_pos.out new file mode 100644 index 0000000000..daf5dce0c0 --- /dev/null +++ b/test/primers/y.primer3_with_ref_pos.out @@ -0,0 +1,134 @@ +SEQUENCE_ID=y|gene|feature|0 +SEQUENCE_TEMPLATE=CAAATAAGGCTTGGAAATTTTCTGGAGTTCTATTATATTCCAACTCTCTGGTTCCTGGTGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTTCAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTACTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTTACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAACACAGTCAACGCTCAATACAAGGGACATTAGGATTGGCAGTAGCTCAGAGATCTCTCTGCTCACCGTGATCTTCAAGTTTGAAAATTGCATCTCAAATCTAAGACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAACAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATCTTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAGCTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATGA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 +PRIMER_LEFT_EXPLAIN=considered 4586, GC content failed 41, low tm 3329, high tm 93, high hairpin stability 6, long poly-x seq 15, ok 1102 +PRIMER_RIGHT_EXPLAIN=considered 4585, GC content failed 40, low tm 3257, high tm 106, long poly-x seq 15, ok 1167 +PRIMER_PAIR_EXPLAIN=considered 106, unacceptable product size 101, ok 5 +PRIMER_LEFT_NUM_RETURNED=5 +PRIMER_RIGHT_NUM_RETURNED=5 +PRIMER_INTERNAL_NUM_RETURNED=0 +PRIMER_PAIR_NUM_RETURNED=5 +PRIMER_PAIR_0_PENALTY=0.214768 +PRIMER_LEFT_0_PENALTY=0.107017 +PRIMER_RIGHT_0_PENALTY=0.107752 +PRIMER_LEFT_0_SEQUENCE=TGCCTGGCATAGAGGAAAGC +PRIMER_RIGHT_0_SEQUENCE=GCCAGAAGAGCCTCAAGGAG +PRIMER_LEFT_0=362,20 +PRIMER_RIGHT_0=485,20 +PRIMER_LEFT_0_TM=60.107 +PRIMER_RIGHT_0_TM=60.108 +PRIMER_LEFT_0_GC_PERCENT=55.000 +PRIMER_RIGHT_0_GC_PERCENT=60.000 +PRIMER_LEFT_0_SELF_ANY_TH=19.30 +PRIMER_RIGHT_0_SELF_ANY_TH=0.00 +PRIMER_LEFT_0_SELF_END_TH=0.00 +PRIMER_RIGHT_0_SELF_END_TH=0.00 +PRIMER_LEFT_0_HAIRPIN_TH=31.71 +PRIMER_RIGHT_0_HAIRPIN_TH=37.39 +PRIMER_LEFT_0_END_STABILITY=3.5100 +PRIMER_RIGHT_0_END_STABILITY=3.6900 +PRIMER_PAIR_0_COMPL_ANY_TH=6.57 +PRIMER_PAIR_0_COMPL_END_TH=4.13 +PRIMER_PAIR_0_PRODUCT_SIZE=124 +PRIMER_PAIR_0_PRODUCT_TM=81.8 +PRIMER_PAIR_1_PENALTY=0.351214 +PRIMER_LEFT_1_PENALTY=0.172352 +PRIMER_RIGHT_1_PENALTY=0.178861 +PRIMER_LEFT_1_SEQUENCE=GAGTCGAGGCTCAAGGACAG +PRIMER_RIGHT_1_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_1=620,20 +PRIMER_RIGHT_1=764,20 +PRIMER_LEFT_1_TM=59.828 +PRIMER_RIGHT_1_TM=60.179 +PRIMER_LEFT_1_GC_PERCENT=60.000 +PRIMER_RIGHT_1_GC_PERCENT=55.000 +PRIMER_LEFT_1_SELF_ANY_TH=15.95 +PRIMER_RIGHT_1_SELF_ANY_TH=0.00 +PRIMER_LEFT_1_SELF_END_TH=0.00 +PRIMER_RIGHT_1_SELF_END_TH=0.00 +PRIMER_LEFT_1_HAIRPIN_TH=35.67 +PRIMER_RIGHT_1_HAIRPIN_TH=0.00 +PRIMER_LEFT_1_END_STABILITY=3.5100 +PRIMER_RIGHT_1_END_STABILITY=4.2000 +PRIMER_PAIR_1_COMPL_ANY_TH=0.00 +PRIMER_PAIR_1_COMPL_END_TH=0.00 +PRIMER_PAIR_1_PRODUCT_SIZE=145 +PRIMER_PAIR_1_PRODUCT_TM=83.5 +PRIMER_PAIR_2_PENALTY=0.351214 +PRIMER_LEFT_2_PENALTY=0.172352 +PRIMER_RIGHT_2_PENALTY=0.178861 +PRIMER_LEFT_2_SEQUENCE=CAGAGTCGAGGCTCAAGGAC +PRIMER_RIGHT_2_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_2=618,20 +PRIMER_RIGHT_2=764,20 +PRIMER_LEFT_2_TM=59.828 +PRIMER_RIGHT_2_TM=60.179 +PRIMER_LEFT_2_GC_PERCENT=60.000 +PRIMER_RIGHT_2_GC_PERCENT=55.000 +PRIMER_LEFT_2_SELF_ANY_TH=15.95 +PRIMER_RIGHT_2_SELF_ANY_TH=0.00 +PRIMER_LEFT_2_SELF_END_TH=13.47 +PRIMER_RIGHT_2_SELF_END_TH=0.00 +PRIMER_LEFT_2_HAIRPIN_TH=37.94 +PRIMER_RIGHT_2_HAIRPIN_TH=0.00 +PRIMER_LEFT_2_END_STABILITY=3.8500 +PRIMER_RIGHT_2_END_STABILITY=4.2000 +PRIMER_PAIR_2_COMPL_ANY_TH=0.00 +PRIMER_PAIR_2_COMPL_END_TH=0.00 +PRIMER_PAIR_2_PRODUCT_SIZE=147 +PRIMER_PAIR_2_PRODUCT_TM=83.6 +PRIMER_PAIR_3_PENALTY=0.354392 +PRIMER_LEFT_3_PENALTY=0.175531 +PRIMER_RIGHT_3_PENALTY=0.178861 +PRIMER_LEFT_3_SEQUENCE=GAGGCTCAAGGACAGCTCTC +PRIMER_RIGHT_3_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_3=625,20 +PRIMER_RIGHT_3=764,20 +PRIMER_LEFT_3_TM=59.824 +PRIMER_RIGHT_3_TM=60.179 +PRIMER_LEFT_3_GC_PERCENT=60.000 +PRIMER_RIGHT_3_GC_PERCENT=55.000 +PRIMER_LEFT_3_SELF_ANY_TH=10.25 +PRIMER_RIGHT_3_SELF_ANY_TH=0.00 +PRIMER_LEFT_3_SELF_END_TH=0.00 +PRIMER_RIGHT_3_SELF_END_TH=0.00 +PRIMER_LEFT_3_HAIRPIN_TH=37.05 +PRIMER_RIGHT_3_HAIRPIN_TH=0.00 +PRIMER_LEFT_3_END_STABILITY=3.2000 +PRIMER_RIGHT_3_END_STABILITY=4.2000 +PRIMER_PAIR_3_COMPL_ANY_TH=26.57 +PRIMER_PAIR_3_COMPL_END_TH=26.57 +PRIMER_PAIR_3_PRODUCT_SIZE=140 +PRIMER_PAIR_3_PRODUCT_TM=83.2 +PRIMER_PAIR_4_PENALTY=0.360353 +PRIMER_LEFT_4_PENALTY=0.326264 +PRIMER_RIGHT_4_PENALTY=0.034089 +PRIMER_LEFT_4_SEQUENCE=TCCAGAAGCTGCTCTTTCCC +PRIMER_RIGHT_4_SEQUENCE=GCCTGGGTAGCTTTGGATGT +PRIMER_LEFT_4=819,20 +PRIMER_RIGHT_4=954,20 +PRIMER_LEFT_4_TM=59.674 +PRIMER_RIGHT_4_TM=60.034 +PRIMER_LEFT_4_GC_PERCENT=55.000 +PRIMER_RIGHT_4_GC_PERCENT=55.000 +PRIMER_LEFT_4_SELF_ANY_TH=1.00 +PRIMER_RIGHT_4_SELF_ANY_TH=0.00 +PRIMER_LEFT_4_SELF_END_TH=0.00 +PRIMER_RIGHT_4_SELF_END_TH=0.00 +PRIMER_LEFT_4_HAIRPIN_TH=34.56 +PRIMER_RIGHT_4_HAIRPIN_TH=0.00 +PRIMER_LEFT_4_END_STABILITY=3.9700 +PRIMER_RIGHT_4_END_STABILITY=3.0600 +PRIMER_PAIR_4_COMPL_ANY_TH=13.72 +PRIMER_PAIR_4_COMPL_END_TH=10.53 +PRIMER_PAIR_4_PRODUCT_SIZE=136 +PRIMER_PAIR_4_PRODUCT_TM=83.6 += diff --git a/test/primers/y.primer3_with_ref_pos_11.nopath.out b/test/primers/y.primer3_with_ref_pos_11.nopath.out new file mode 100644 index 0000000000..095274bc04 --- /dev/null +++ b/test/primers/y.primer3_with_ref_pos_11.nopath.out @@ -0,0 +1,134 @@ +SEQUENCE_ID=x|gene|feature|11 +SEQUENCE_TEMPLATE=TGGAAATTTTCTGGAGTTCTATTATATTCCAACTCTCTGGTTCCTGGTGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTTCAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTACTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTTACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAACACAGTCAACGCTCAATACAAGGGACATTAGGATTGGCAGTAGCTCAGAGATCTCTCTGCTCACCGTGATCTTCAAGTTTGAAAATTGCATCTCAAATCTAAGACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAACAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATCTTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAGCTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATGA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 +PRIMER_LEFT_EXPLAIN=considered 4531, GC content failed 41, low tm 3277, high tm 93, high hairpin stability 6, long poly-x seq 15, ok 1099 +PRIMER_RIGHT_EXPLAIN=considered 4530, GC content failed 40, low tm 3203, high tm 106, long poly-x seq 15, ok 1166 +PRIMER_PAIR_EXPLAIN=considered 106, unacceptable product size 101, ok 5 +PRIMER_LEFT_NUM_RETURNED=5 +PRIMER_RIGHT_NUM_RETURNED=5 +PRIMER_INTERNAL_NUM_RETURNED=0 +PRIMER_PAIR_NUM_RETURNED=5 +PRIMER_PAIR_0_PENALTY=0.214768 +PRIMER_LEFT_0_PENALTY=0.107017 +PRIMER_RIGHT_0_PENALTY=0.107752 +PRIMER_LEFT_0_SEQUENCE=TGCCTGGCATAGAGGAAAGC +PRIMER_RIGHT_0_SEQUENCE=GCCAGAAGAGCCTCAAGGAG +PRIMER_LEFT_0=351,20 +PRIMER_RIGHT_0=474,20 +PRIMER_LEFT_0_TM=60.107 +PRIMER_RIGHT_0_TM=60.108 +PRIMER_LEFT_0_GC_PERCENT=55.000 +PRIMER_RIGHT_0_GC_PERCENT=60.000 +PRIMER_LEFT_0_SELF_ANY_TH=19.30 +PRIMER_RIGHT_0_SELF_ANY_TH=0.00 +PRIMER_LEFT_0_SELF_END_TH=0.00 +PRIMER_RIGHT_0_SELF_END_TH=0.00 +PRIMER_LEFT_0_HAIRPIN_TH=31.71 +PRIMER_RIGHT_0_HAIRPIN_TH=37.39 +PRIMER_LEFT_0_END_STABILITY=3.5100 +PRIMER_RIGHT_0_END_STABILITY=3.6900 +PRIMER_PAIR_0_COMPL_ANY_TH=6.57 +PRIMER_PAIR_0_COMPL_END_TH=4.13 +PRIMER_PAIR_0_PRODUCT_SIZE=124 +PRIMER_PAIR_0_PRODUCT_TM=81.8 +PRIMER_PAIR_1_PENALTY=0.351214 +PRIMER_LEFT_1_PENALTY=0.172352 +PRIMER_RIGHT_1_PENALTY=0.178861 +PRIMER_LEFT_1_SEQUENCE=GAGTCGAGGCTCAAGGACAG +PRIMER_RIGHT_1_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_1=609,20 +PRIMER_RIGHT_1=753,20 +PRIMER_LEFT_1_TM=59.828 +PRIMER_RIGHT_1_TM=60.179 +PRIMER_LEFT_1_GC_PERCENT=60.000 +PRIMER_RIGHT_1_GC_PERCENT=55.000 +PRIMER_LEFT_1_SELF_ANY_TH=15.95 +PRIMER_RIGHT_1_SELF_ANY_TH=0.00 +PRIMER_LEFT_1_SELF_END_TH=0.00 +PRIMER_RIGHT_1_SELF_END_TH=0.00 +PRIMER_LEFT_1_HAIRPIN_TH=35.67 +PRIMER_RIGHT_1_HAIRPIN_TH=0.00 +PRIMER_LEFT_1_END_STABILITY=3.5100 +PRIMER_RIGHT_1_END_STABILITY=4.2000 +PRIMER_PAIR_1_COMPL_ANY_TH=0.00 +PRIMER_PAIR_1_COMPL_END_TH=0.00 +PRIMER_PAIR_1_PRODUCT_SIZE=145 +PRIMER_PAIR_1_PRODUCT_TM=83.5 +PRIMER_PAIR_2_PENALTY=0.351214 +PRIMER_LEFT_2_PENALTY=0.172352 +PRIMER_RIGHT_2_PENALTY=0.178861 +PRIMER_LEFT_2_SEQUENCE=CAGAGTCGAGGCTCAAGGAC +PRIMER_RIGHT_2_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_2=607,20 +PRIMER_RIGHT_2=753,20 +PRIMER_LEFT_2_TM=59.828 +PRIMER_RIGHT_2_TM=60.179 +PRIMER_LEFT_2_GC_PERCENT=60.000 +PRIMER_RIGHT_2_GC_PERCENT=55.000 +PRIMER_LEFT_2_SELF_ANY_TH=15.95 +PRIMER_RIGHT_2_SELF_ANY_TH=0.00 +PRIMER_LEFT_2_SELF_END_TH=13.47 +PRIMER_RIGHT_2_SELF_END_TH=0.00 +PRIMER_LEFT_2_HAIRPIN_TH=37.94 +PRIMER_RIGHT_2_HAIRPIN_TH=0.00 +PRIMER_LEFT_2_END_STABILITY=3.8500 +PRIMER_RIGHT_2_END_STABILITY=4.2000 +PRIMER_PAIR_2_COMPL_ANY_TH=0.00 +PRIMER_PAIR_2_COMPL_END_TH=0.00 +PRIMER_PAIR_2_PRODUCT_SIZE=147 +PRIMER_PAIR_2_PRODUCT_TM=83.6 +PRIMER_PAIR_3_PENALTY=0.354392 +PRIMER_LEFT_3_PENALTY=0.175531 +PRIMER_RIGHT_3_PENALTY=0.178861 +PRIMER_LEFT_3_SEQUENCE=GAGGCTCAAGGACAGCTCTC +PRIMER_RIGHT_3_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_3=614,20 +PRIMER_RIGHT_3=753,20 +PRIMER_LEFT_3_TM=59.824 +PRIMER_RIGHT_3_TM=60.179 +PRIMER_LEFT_3_GC_PERCENT=60.000 +PRIMER_RIGHT_3_GC_PERCENT=55.000 +PRIMER_LEFT_3_SELF_ANY_TH=10.25 +PRIMER_RIGHT_3_SELF_ANY_TH=0.00 +PRIMER_LEFT_3_SELF_END_TH=0.00 +PRIMER_RIGHT_3_SELF_END_TH=0.00 +PRIMER_LEFT_3_HAIRPIN_TH=37.05 +PRIMER_RIGHT_3_HAIRPIN_TH=0.00 +PRIMER_LEFT_3_END_STABILITY=3.2000 +PRIMER_RIGHT_3_END_STABILITY=4.2000 +PRIMER_PAIR_3_COMPL_ANY_TH=26.57 +PRIMER_PAIR_3_COMPL_END_TH=26.57 +PRIMER_PAIR_3_PRODUCT_SIZE=140 +PRIMER_PAIR_3_PRODUCT_TM=83.2 +PRIMER_PAIR_4_PENALTY=0.360353 +PRIMER_LEFT_4_PENALTY=0.326264 +PRIMER_RIGHT_4_PENALTY=0.034089 +PRIMER_LEFT_4_SEQUENCE=TCCAGAAGCTGCTCTTTCCC +PRIMER_RIGHT_4_SEQUENCE=GCCTGGGTAGCTTTGGATGT +PRIMER_LEFT_4=808,20 +PRIMER_RIGHT_4=943,20 +PRIMER_LEFT_4_TM=59.674 +PRIMER_RIGHT_4_TM=60.034 +PRIMER_LEFT_4_GC_PERCENT=55.000 +PRIMER_RIGHT_4_GC_PERCENT=55.000 +PRIMER_LEFT_4_SELF_ANY_TH=1.00 +PRIMER_RIGHT_4_SELF_ANY_TH=0.00 +PRIMER_LEFT_4_SELF_END_TH=0.00 +PRIMER_RIGHT_4_SELF_END_TH=0.00 +PRIMER_LEFT_4_HAIRPIN_TH=34.56 +PRIMER_RIGHT_4_HAIRPIN_TH=0.00 +PRIMER_LEFT_4_END_STABILITY=3.9700 +PRIMER_RIGHT_4_END_STABILITY=3.0600 +PRIMER_PAIR_4_COMPL_ANY_TH=13.72 +PRIMER_PAIR_4_COMPL_END_TH=10.53 +PRIMER_PAIR_4_PRODUCT_SIZE=136 +PRIMER_PAIR_4_PRODUCT_TM=83.6 += diff --git a/test/primers/y.primer3_with_ref_pos_11.out b/test/primers/y.primer3_with_ref_pos_11.out new file mode 100644 index 0000000000..952f6b1ec1 --- /dev/null +++ b/test/primers/y.primer3_with_ref_pos_11.out @@ -0,0 +1,134 @@ +SEQUENCE_ID=y|gene|feature|11 +SEQUENCE_TEMPLATE=TGGAAATTTTCTGGAGTTCTATTATATTCCAACTCTCTGGTTCCTGGTGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTTCAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTACTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTTACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAACACAGTCAACGCTCAATACAAGGGACATTAGGATTGGCAGTAGCTCAGAGATCTCTCTGCTCACCGTGATCTTCAAGTTTGAAAATTGCATCTCAAATCTAAGACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAACAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATCTTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAGCTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATGA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 +PRIMER_LEFT_EXPLAIN=considered 4531, GC content failed 41, low tm 3277, high tm 93, high hairpin stability 6, long poly-x seq 15, ok 1099 +PRIMER_RIGHT_EXPLAIN=considered 4530, GC content failed 40, low tm 3203, high tm 106, long poly-x seq 15, ok 1166 +PRIMER_PAIR_EXPLAIN=considered 106, unacceptable product size 101, ok 5 +PRIMER_LEFT_NUM_RETURNED=5 +PRIMER_RIGHT_NUM_RETURNED=5 +PRIMER_INTERNAL_NUM_RETURNED=0 +PRIMER_PAIR_NUM_RETURNED=5 +PRIMER_PAIR_0_PENALTY=0.214768 +PRIMER_LEFT_0_PENALTY=0.107017 +PRIMER_RIGHT_0_PENALTY=0.107752 +PRIMER_LEFT_0_SEQUENCE=TGCCTGGCATAGAGGAAAGC +PRIMER_RIGHT_0_SEQUENCE=GCCAGAAGAGCCTCAAGGAG +PRIMER_LEFT_0=351,20 +PRIMER_RIGHT_0=474,20 +PRIMER_LEFT_0_TM=60.107 +PRIMER_RIGHT_0_TM=60.108 +PRIMER_LEFT_0_GC_PERCENT=55.000 +PRIMER_RIGHT_0_GC_PERCENT=60.000 +PRIMER_LEFT_0_SELF_ANY_TH=19.30 +PRIMER_RIGHT_0_SELF_ANY_TH=0.00 +PRIMER_LEFT_0_SELF_END_TH=0.00 +PRIMER_RIGHT_0_SELF_END_TH=0.00 +PRIMER_LEFT_0_HAIRPIN_TH=31.71 +PRIMER_RIGHT_0_HAIRPIN_TH=37.39 +PRIMER_LEFT_0_END_STABILITY=3.5100 +PRIMER_RIGHT_0_END_STABILITY=3.6900 +PRIMER_PAIR_0_COMPL_ANY_TH=6.57 +PRIMER_PAIR_0_COMPL_END_TH=4.13 +PRIMER_PAIR_0_PRODUCT_SIZE=124 +PRIMER_PAIR_0_PRODUCT_TM=81.8 +PRIMER_PAIR_1_PENALTY=0.351214 +PRIMER_LEFT_1_PENALTY=0.172352 +PRIMER_RIGHT_1_PENALTY=0.178861 +PRIMER_LEFT_1_SEQUENCE=GAGTCGAGGCTCAAGGACAG +PRIMER_RIGHT_1_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_1=609,20 +PRIMER_RIGHT_1=753,20 +PRIMER_LEFT_1_TM=59.828 +PRIMER_RIGHT_1_TM=60.179 +PRIMER_LEFT_1_GC_PERCENT=60.000 +PRIMER_RIGHT_1_GC_PERCENT=55.000 +PRIMER_LEFT_1_SELF_ANY_TH=15.95 +PRIMER_RIGHT_1_SELF_ANY_TH=0.00 +PRIMER_LEFT_1_SELF_END_TH=0.00 +PRIMER_RIGHT_1_SELF_END_TH=0.00 +PRIMER_LEFT_1_HAIRPIN_TH=35.67 +PRIMER_RIGHT_1_HAIRPIN_TH=0.00 +PRIMER_LEFT_1_END_STABILITY=3.5100 +PRIMER_RIGHT_1_END_STABILITY=4.2000 +PRIMER_PAIR_1_COMPL_ANY_TH=0.00 +PRIMER_PAIR_1_COMPL_END_TH=0.00 +PRIMER_PAIR_1_PRODUCT_SIZE=145 +PRIMER_PAIR_1_PRODUCT_TM=83.5 +PRIMER_PAIR_2_PENALTY=0.351214 +PRIMER_LEFT_2_PENALTY=0.172352 +PRIMER_RIGHT_2_PENALTY=0.178861 +PRIMER_LEFT_2_SEQUENCE=CAGAGTCGAGGCTCAAGGAC +PRIMER_RIGHT_2_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_2=607,20 +PRIMER_RIGHT_2=753,20 +PRIMER_LEFT_2_TM=59.828 +PRIMER_RIGHT_2_TM=60.179 +PRIMER_LEFT_2_GC_PERCENT=60.000 +PRIMER_RIGHT_2_GC_PERCENT=55.000 +PRIMER_LEFT_2_SELF_ANY_TH=15.95 +PRIMER_RIGHT_2_SELF_ANY_TH=0.00 +PRIMER_LEFT_2_SELF_END_TH=13.47 +PRIMER_RIGHT_2_SELF_END_TH=0.00 +PRIMER_LEFT_2_HAIRPIN_TH=37.94 +PRIMER_RIGHT_2_HAIRPIN_TH=0.00 +PRIMER_LEFT_2_END_STABILITY=3.8500 +PRIMER_RIGHT_2_END_STABILITY=4.2000 +PRIMER_PAIR_2_COMPL_ANY_TH=0.00 +PRIMER_PAIR_2_COMPL_END_TH=0.00 +PRIMER_PAIR_2_PRODUCT_SIZE=147 +PRIMER_PAIR_2_PRODUCT_TM=83.6 +PRIMER_PAIR_3_PENALTY=0.354392 +PRIMER_LEFT_3_PENALTY=0.175531 +PRIMER_RIGHT_3_PENALTY=0.178861 +PRIMER_LEFT_3_SEQUENCE=GAGGCTCAAGGACAGCTCTC +PRIMER_RIGHT_3_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_3=614,20 +PRIMER_RIGHT_3=753,20 +PRIMER_LEFT_3_TM=59.824 +PRIMER_RIGHT_3_TM=60.179 +PRIMER_LEFT_3_GC_PERCENT=60.000 +PRIMER_RIGHT_3_GC_PERCENT=55.000 +PRIMER_LEFT_3_SELF_ANY_TH=10.25 +PRIMER_RIGHT_3_SELF_ANY_TH=0.00 +PRIMER_LEFT_3_SELF_END_TH=0.00 +PRIMER_RIGHT_3_SELF_END_TH=0.00 +PRIMER_LEFT_3_HAIRPIN_TH=37.05 +PRIMER_RIGHT_3_HAIRPIN_TH=0.00 +PRIMER_LEFT_3_END_STABILITY=3.2000 +PRIMER_RIGHT_3_END_STABILITY=4.2000 +PRIMER_PAIR_3_COMPL_ANY_TH=26.57 +PRIMER_PAIR_3_COMPL_END_TH=26.57 +PRIMER_PAIR_3_PRODUCT_SIZE=140 +PRIMER_PAIR_3_PRODUCT_TM=83.2 +PRIMER_PAIR_4_PENALTY=0.360353 +PRIMER_LEFT_4_PENALTY=0.326264 +PRIMER_RIGHT_4_PENALTY=0.034089 +PRIMER_LEFT_4_SEQUENCE=TCCAGAAGCTGCTCTTTCCC +PRIMER_RIGHT_4_SEQUENCE=GCCTGGGTAGCTTTGGATGT +PRIMER_LEFT_4=808,20 +PRIMER_RIGHT_4=943,20 +PRIMER_LEFT_4_TM=59.674 +PRIMER_RIGHT_4_TM=60.034 +PRIMER_LEFT_4_GC_PERCENT=55.000 +PRIMER_RIGHT_4_GC_PERCENT=55.000 +PRIMER_LEFT_4_SELF_ANY_TH=1.00 +PRIMER_RIGHT_4_SELF_ANY_TH=0.00 +PRIMER_LEFT_4_SELF_END_TH=0.00 +PRIMER_RIGHT_4_SELF_END_TH=0.00 +PRIMER_LEFT_4_HAIRPIN_TH=34.56 +PRIMER_RIGHT_4_HAIRPIN_TH=0.00 +PRIMER_LEFT_4_END_STABILITY=3.9700 +PRIMER_RIGHT_4_END_STABILITY=3.0600 +PRIMER_PAIR_4_COMPL_ANY_TH=13.72 +PRIMER_PAIR_4_COMPL_END_TH=10.53 +PRIMER_PAIR_4_PRODUCT_SIZE=136 +PRIMER_PAIR_4_PRODUCT_TM=83.6 += diff --git a/test/primers/y.primer3config.split b/test/primers/y.primer3config.split new file mode 100644 index 0000000000..046319a2f2 --- /dev/null +++ b/test/primers/y.primer3config.split @@ -0,0 +1,24 @@ +SEQUENCE_ID=y|gene|feature|11 +SEQUENCE_TEMPLATE=TGGAAATTTTCTGGAGTTCTATTATATTCCAACTCTCTGGTTCCTGGTGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTTCAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTACTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTTACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAACACAGTCAACGCTCAATACAAGGGACATTAGGATTGGCAGTAGCTCAGAGATCTCTCTGCTCACCGTGATCTTCAAGTTTGAAAATTGCATCTCAAATCTAAGACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAACAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATCTTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAGCTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATGA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 += +SEQUENCE_ID=y|gene|feature|0 +SEQUENCE_TEMPLATE=CAAATAAGGCTTGGAAATTTTCTGGAGTTCTATTATATTCCAACTCTCTGGTTCCTGGTGCTATGTGTAACTAGTAATGGTAATGGATATGTTGGGCTTTTTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTTCAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTACTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTTACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAACACAGTCAACGCTCAATACAAGGGACATTAGGATTGGCAGTAGCTCAGAGATCTCTCTGCTCACCGTGATCTTCAAGTTTGAAAATTGCATCTCAAATCTAAGACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAACAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATCTTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAGCTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATGA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 += diff --git a/test/primers/y.ref_pos_0.out b/test/primers/y.ref_pos_0.out new file mode 100644 index 0000000000..e051f4c85d --- /dev/null +++ b/test/primers/y.ref_pos_0.out @@ -0,0 +1,6 @@ +chrom tplpos lpseq rpseq lppostpl rppostmp lpposchrom rpposchrom lplen rplen linsize minsize maxsize nvprimers nvproducts +y 0 TGCCTGGCATAGAGGAAAGC GCCAGAAGAGCCTCAAGGAG 362 466 362 466 20 20 124 124 124 1 0 +y 0 GAGTCGAGGCTCAAGGACAG AGGAGAGCTGGGAAAAGGGA 620 745 620 745 20 20 145 142 145 1 0 +y 0 CAGAGTCGAGGCTCAAGGAC AGGAGAGCTGGGAAAAGGGA 618 745 618 745 20 20 147 144 147 1 0 +y 0 GAGGCTCAAGGACAGCTCTC AGGAGAGCTGGGAAAAGGGA 625 745 625 745 20 20 140 137 140 1 0 +y 0 TCCAGAAGCTGCTCTTTCCC GCCTGGGTAGCTTTGGATGT 819 935 819 935 20 20 136 135 138 1 0 diff --git a/test/primers/y.ref_pos_11.out b/test/primers/y.ref_pos_11.out new file mode 100644 index 0000000000..0f32344e26 --- /dev/null +++ b/test/primers/y.ref_pos_11.out @@ -0,0 +1,6 @@ +chrom tplpos lpseq rpseq lppostpl rppostmp lpposchrom rpposchrom lplen rplen linsize minsize maxsize nvprimers nvproducts +y_x 11 TGCCTGGCATAGAGGAAAGC GCCAGAAGAGCCTCAAGGAG 351 455 362 466 20 20 124 124 124 1 0 +y 11 GAGTCGAGGCTCAAGGACAG AGGAGAGCTGGGAAAAGGGA 609 734 620 745 20 20 145 142 145 1 0 +y 11 CAGAGTCGAGGCTCAAGGAC AGGAGAGCTGGGAAAAGGGA 607 734 618 745 20 20 147 144 147 1 0 +y 11 GAGGCTCAAGGACAGCTCTC AGGAGAGCTGGGAAAAGGGA 614 734 625 745 20 20 140 137 140 1 0 +y 11 TCCAGAAGCTGCTCTTTCCC GCCTGGGTAGCTTTGGATGT 808 924 819 935 20 20 136 135 138 1 0 diff --git a/test/primers/y.ri b/test/primers/y.ri new file mode 100644 index 0000000000..c466b0f0ac Binary files /dev/null and b/test/primers/y.ri differ diff --git a/test/primers/y.split.out b/test/primers/y.split.out new file mode 100644 index 0000000000..2cb765416a --- /dev/null +++ b/test/primers/y.split.out @@ -0,0 +1,268 @@ +SEQUENCE_ID=y|gene|feature|100 +SEQUENCE_TEMPLATE=TTTCTTTGATTTATTTGAAGTGACGTTTGACAATCTATCACTAGGGGTAATGTGGGGAAATGGAAAGAATACAAGATTTGGAGCCAGACAAATCTGGGTTCAAATCCTCACTTTGCCACATATTAGCCATGTGACTTTGAACAAGTTAGTTAATCTCTCTGAACTTCAGTTTAATTATCTCTAATATGGAGATGATACTACTGACAGCAGAGGTTTGCTGTGAAGATTAAATTAGGTGATGCTTGTAAAGCTCAGGGAATAGTGCCTGGCATAGAGGAAAGCCTCTGACAACTGGTAGTTACTGTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCCTCCTTGAGGCTCTTCTGGCTTTTCATTGTCAACA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 +PRIMER_LEFT_EXPLAIN=considered 1635, GC content failed 20, low tm 1345, high tm 6, high hairpin stability 19, ok 245 +PRIMER_RIGHT_EXPLAIN=considered 1635, GC content failed 16, low tm 1303, high tm 8, high hairpin stability 3, ok 305 +PRIMER_PAIR_EXPLAIN=considered 61, unacceptable product size 55, ok 6 +PRIMER_LEFT_NUM_RETURNED=5 +PRIMER_RIGHT_NUM_RETURNED=5 +PRIMER_INTERNAL_NUM_RETURNED=0 +PRIMER_PAIR_NUM_RETURNED=5 +PRIMER_PAIR_0_PENALTY=0.214768 +PRIMER_LEFT_0_PENALTY=0.107017 +PRIMER_RIGHT_0_PENALTY=0.107752 +PRIMER_LEFT_0_SEQUENCE=TGCCTGGCATAGAGGAAAGC +PRIMER_RIGHT_0_SEQUENCE=GCCAGAAGAGCCTCAAGGAG +PRIMER_LEFT_0=262,20 +PRIMER_RIGHT_0=385,20 +PRIMER_LEFT_0_TM=60.107 +PRIMER_RIGHT_0_TM=60.108 +PRIMER_LEFT_0_GC_PERCENT=55.000 +PRIMER_RIGHT_0_GC_PERCENT=60.000 +PRIMER_LEFT_0_SELF_ANY_TH=19.30 +PRIMER_RIGHT_0_SELF_ANY_TH=0.00 +PRIMER_LEFT_0_SELF_END_TH=0.00 +PRIMER_RIGHT_0_SELF_END_TH=0.00 +PRIMER_LEFT_0_HAIRPIN_TH=31.71 +PRIMER_RIGHT_0_HAIRPIN_TH=37.39 +PRIMER_LEFT_0_END_STABILITY=3.5100 +PRIMER_RIGHT_0_END_STABILITY=3.6900 +PRIMER_PAIR_0_COMPL_ANY_TH=6.57 +PRIMER_PAIR_0_COMPL_END_TH=4.13 +PRIMER_PAIR_0_PRODUCT_SIZE=124 +PRIMER_PAIR_0_PRODUCT_TM=81.8 +PRIMER_PAIR_1_PENALTY=0.434608 +PRIMER_LEFT_1_PENALTY=0.107017 +PRIMER_RIGHT_1_PENALTY=0.327591 +PRIMER_LEFT_1_SEQUENCE=TGCCTGGCATAGAGGAAAGC +PRIMER_RIGHT_1_SEQUENCE=AAGCCAGAAGAGCCTCAAGG +PRIMER_LEFT_1=262,20 +PRIMER_RIGHT_1=387,20 +PRIMER_LEFT_1_TM=60.107 +PRIMER_RIGHT_1_TM=59.672 +PRIMER_LEFT_1_GC_PERCENT=55.000 +PRIMER_RIGHT_1_GC_PERCENT=55.000 +PRIMER_LEFT_1_SELF_ANY_TH=19.30 +PRIMER_RIGHT_1_SELF_ANY_TH=0.00 +PRIMER_LEFT_1_SELF_END_TH=0.00 +PRIMER_RIGHT_1_SELF_END_TH=0.00 +PRIMER_LEFT_1_HAIRPIN_TH=31.71 +PRIMER_RIGHT_1_HAIRPIN_TH=38.53 +PRIMER_LEFT_1_END_STABILITY=3.5100 +PRIMER_RIGHT_1_END_STABILITY=3.6100 +PRIMER_PAIR_1_COMPL_ANY_TH=11.71 +PRIMER_PAIR_1_COMPL_END_TH=4.81 +PRIMER_PAIR_1_PRODUCT_SIZE=126 +PRIMER_PAIR_1_PRODUCT_TM=81.6 +PRIMER_PAIR_2_PENALTY=0.653284 +PRIMER_LEFT_2_PENALTY=0.107017 +PRIMER_RIGHT_2_PENALTY=0.546267 +PRIMER_LEFT_2_SEQUENCE=TGCCTGGCATAGAGGAAAGC +PRIMER_RIGHT_2_SEQUENCE=AGCCAGAAGAGCCTCAAGGA +PRIMER_LEFT_2=262,20 +PRIMER_RIGHT_2=386,20 +PRIMER_LEFT_2_TM=60.107 +PRIMER_RIGHT_2_TM=60.546 +PRIMER_LEFT_2_GC_PERCENT=55.000 +PRIMER_RIGHT_2_GC_PERCENT=55.000 +PRIMER_LEFT_2_SELF_ANY_TH=19.30 +PRIMER_RIGHT_2_SELF_ANY_TH=0.00 +PRIMER_LEFT_2_SELF_END_TH=0.00 +PRIMER_RIGHT_2_SELF_END_TH=0.00 +PRIMER_LEFT_2_HAIRPIN_TH=31.71 +PRIMER_RIGHT_2_HAIRPIN_TH=37.39 +PRIMER_LEFT_2_END_STABILITY=3.5100 +PRIMER_RIGHT_2_END_STABILITY=3.3600 +PRIMER_PAIR_2_COMPL_ANY_TH=11.71 +PRIMER_PAIR_2_COMPL_END_TH=0.00 +PRIMER_PAIR_2_PRODUCT_SIZE=125 +PRIMER_PAIR_2_PRODUCT_TM=81.7 +PRIMER_PAIR_3_PENALTY=1.180717 +PRIMER_LEFT_3_PENALTY=1.072965 +PRIMER_RIGHT_3_PENALTY=0.107752 +PRIMER_LEFT_3_SEQUENCE=AGGAAAGCCTCTGACAACTGG +PRIMER_RIGHT_3_SEQUENCE=GCCAGAAGAGCCTCAAGGAG +PRIMER_LEFT_3=274,21 +PRIMER_RIGHT_3=385,20 +PRIMER_LEFT_3_TM=59.927 +PRIMER_RIGHT_3_TM=60.108 +PRIMER_LEFT_3_GC_PERCENT=52.381 +PRIMER_RIGHT_3_GC_PERCENT=60.000 +PRIMER_LEFT_3_SELF_ANY_TH=0.00 +PRIMER_RIGHT_3_SELF_ANY_TH=0.00 +PRIMER_LEFT_3_SELF_END_TH=0.00 +PRIMER_RIGHT_3_SELF_END_TH=0.00 +PRIMER_LEFT_3_HAIRPIN_TH=42.19 +PRIMER_RIGHT_3_HAIRPIN_TH=37.39 +PRIMER_LEFT_3_END_STABILITY=4.0000 +PRIMER_RIGHT_3_END_STABILITY=3.6900 +PRIMER_PAIR_3_COMPL_ANY_TH=0.00 +PRIMER_PAIR_3_COMPL_END_TH=0.00 +PRIMER_PAIR_3_PRODUCT_SIZE=112 +PRIMER_PAIR_3_PRODUCT_TM=80.7 +PRIMER_PAIR_4_PENALTY=1.310570 +PRIMER_LEFT_4_PENALTY=1.202818 +PRIMER_RIGHT_4_PENALTY=0.107752 +PRIMER_LEFT_4_SEQUENCE=GGAAAGCCTCTGACAACTGGT +PRIMER_RIGHT_4_SEQUENCE=GCCAGAAGAGCCTCAAGGAG +PRIMER_LEFT_4=275,21 +PRIMER_RIGHT_4=385,20 +PRIMER_LEFT_4_TM=60.203 +PRIMER_RIGHT_4_TM=60.108 +PRIMER_LEFT_4_GC_PERCENT=52.381 +PRIMER_RIGHT_4_GC_PERCENT=60.000 +PRIMER_LEFT_4_SELF_ANY_TH=0.00 +PRIMER_RIGHT_4_SELF_ANY_TH=0.00 +PRIMER_LEFT_4_SELF_END_TH=0.00 +PRIMER_RIGHT_4_SELF_END_TH=0.00 +PRIMER_LEFT_4_HAIRPIN_TH=0.00 +PRIMER_RIGHT_4_HAIRPIN_TH=37.39 +PRIMER_LEFT_4_END_STABILITY=4.0000 +PRIMER_RIGHT_4_END_STABILITY=3.6900 +PRIMER_PAIR_4_COMPL_ANY_TH=0.00 +PRIMER_PAIR_4_COMPL_END_TH=6.95 +PRIMER_PAIR_4_PRODUCT_SIZE=111 +PRIMER_PAIR_4_PRODUCT_TM=80.8 += +SEQUENCE_ID=y|gene|feature|601 +SEQUENCE_TEMPLATE=GACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTGTATACGATGTAACTCTGTTCGGGCACTGGTGAAAGATAACAGAGGAAATGCCTGGCTTTTTATCAGAACATGTTTCCAAGCTTATCCCTTTTCCCAGCTCTCCTTGTCCCTCCCAAGATCTCTTCACTGGCCTCTTATCTTTACTGTTACCAAATCTTTCCAGAAGCTGCTCTTTCCCTCAATTGTTCATTTGTCTTCTTGTCCAGGAATGAACCACTGCTCTCTTCTTGTCAGATCAGCTTCTCATCCCTCCTCAAGGGCCTTTAACTACTCCACATCCAAAGCTACCCAGGCCATTTTAAGTTTCCTGTGGACTAAGGACAAAGGTGCGGGGAGATGA +PRIMER_TASK=generic +PRIMER_PICK_LEFT_PRIMER=1 +PRIMER_PICK_INTERNAL_OLIGO=0 +PRIMER_PICK_RIGHT_PRIMER=1 +PRIMER_OPT_SIZE=20 +PRIMER_MIN_SIZE=18 +PRIMER_MAX_SIZE=22 +PRIMER_PRODUCT_SIZE_RANGE=75-150 +PRIMER_EXPLAIN_FLAG=1 +PRIMER_LEFT_EXPLAIN=considered 1630, low tm 1013, high tm 75, high hairpin stability 1, ok 541 +PRIMER_RIGHT_EXPLAIN=considered 1630, low tm 1066, high tm 37, ok 527 +PRIMER_PAIR_EXPLAIN=considered 23, unacceptable product size 18, ok 5 +PRIMER_LEFT_NUM_RETURNED=5 +PRIMER_RIGHT_NUM_RETURNED=5 +PRIMER_INTERNAL_NUM_RETURNED=0 +PRIMER_PAIR_NUM_RETURNED=5 +PRIMER_PAIR_0_PENALTY=0.351214 +PRIMER_LEFT_0_PENALTY=0.172352 +PRIMER_RIGHT_0_PENALTY=0.178861 +PRIMER_LEFT_0_SEQUENCE=GAGTCGAGGCTCAAGGACAG +PRIMER_RIGHT_0_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_0=19,20 +PRIMER_RIGHT_0=163,20 +PRIMER_LEFT_0_TM=59.828 +PRIMER_RIGHT_0_TM=60.179 +PRIMER_LEFT_0_GC_PERCENT=60.000 +PRIMER_RIGHT_0_GC_PERCENT=55.000 +PRIMER_LEFT_0_SELF_ANY_TH=15.95 +PRIMER_RIGHT_0_SELF_ANY_TH=0.00 +PRIMER_LEFT_0_SELF_END_TH=0.00 +PRIMER_RIGHT_0_SELF_END_TH=0.00 +PRIMER_LEFT_0_HAIRPIN_TH=35.67 +PRIMER_RIGHT_0_HAIRPIN_TH=0.00 +PRIMER_LEFT_0_END_STABILITY=3.5100 +PRIMER_RIGHT_0_END_STABILITY=4.2000 +PRIMER_PAIR_0_COMPL_ANY_TH=0.00 +PRIMER_PAIR_0_COMPL_END_TH=0.00 +PRIMER_PAIR_0_PRODUCT_SIZE=145 +PRIMER_PAIR_0_PRODUCT_TM=83.5 +PRIMER_PAIR_1_PENALTY=0.351214 +PRIMER_LEFT_1_PENALTY=0.172352 +PRIMER_RIGHT_1_PENALTY=0.178861 +PRIMER_LEFT_1_SEQUENCE=CAGAGTCGAGGCTCAAGGAC +PRIMER_RIGHT_1_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_1=17,20 +PRIMER_RIGHT_1=163,20 +PRIMER_LEFT_1_TM=59.828 +PRIMER_RIGHT_1_TM=60.179 +PRIMER_LEFT_1_GC_PERCENT=60.000 +PRIMER_RIGHT_1_GC_PERCENT=55.000 +PRIMER_LEFT_1_SELF_ANY_TH=15.95 +PRIMER_RIGHT_1_SELF_ANY_TH=0.00 +PRIMER_LEFT_1_SELF_END_TH=13.47 +PRIMER_RIGHT_1_SELF_END_TH=0.00 +PRIMER_LEFT_1_HAIRPIN_TH=37.94 +PRIMER_RIGHT_1_HAIRPIN_TH=0.00 +PRIMER_LEFT_1_END_STABILITY=3.8500 +PRIMER_RIGHT_1_END_STABILITY=4.2000 +PRIMER_PAIR_1_COMPL_ANY_TH=0.00 +PRIMER_PAIR_1_COMPL_END_TH=0.00 +PRIMER_PAIR_1_PRODUCT_SIZE=147 +PRIMER_PAIR_1_PRODUCT_TM=83.6 +PRIMER_PAIR_2_PENALTY=0.354392 +PRIMER_LEFT_2_PENALTY=0.175531 +PRIMER_RIGHT_2_PENALTY=0.178861 +PRIMER_LEFT_2_SEQUENCE=GAGGCTCAAGGACAGCTCTC +PRIMER_RIGHT_2_SEQUENCE=AGGAGAGCTGGGAAAAGGGA +PRIMER_LEFT_2=24,20 +PRIMER_RIGHT_2=163,20 +PRIMER_LEFT_2_TM=59.824 +PRIMER_RIGHT_2_TM=60.179 +PRIMER_LEFT_2_GC_PERCENT=60.000 +PRIMER_RIGHT_2_GC_PERCENT=55.000 +PRIMER_LEFT_2_SELF_ANY_TH=10.25 +PRIMER_RIGHT_2_SELF_ANY_TH=0.00 +PRIMER_LEFT_2_SELF_END_TH=0.00 +PRIMER_RIGHT_2_SELF_END_TH=0.00 +PRIMER_LEFT_2_HAIRPIN_TH=37.05 +PRIMER_RIGHT_2_HAIRPIN_TH=0.00 +PRIMER_LEFT_2_END_STABILITY=3.2000 +PRIMER_RIGHT_2_END_STABILITY=4.2000 +PRIMER_PAIR_2_COMPL_ANY_TH=26.57 +PRIMER_PAIR_2_COMPL_END_TH=26.57 +PRIMER_PAIR_2_PRODUCT_SIZE=140 +PRIMER_PAIR_2_PRODUCT_TM=83.2 +PRIMER_PAIR_3_PENALTY=0.360353 +PRIMER_LEFT_3_PENALTY=0.326264 +PRIMER_RIGHT_3_PENALTY=0.034089 +PRIMER_LEFT_3_SEQUENCE=TCCAGAAGCTGCTCTTTCCC +PRIMER_RIGHT_3_SEQUENCE=GCCTGGGTAGCTTTGGATGT +PRIMER_LEFT_3=218,20 +PRIMER_RIGHT_3=353,20 +PRIMER_LEFT_3_TM=59.674 +PRIMER_RIGHT_3_TM=60.034 +PRIMER_LEFT_3_GC_PERCENT=55.000 +PRIMER_RIGHT_3_GC_PERCENT=55.000 +PRIMER_LEFT_3_SELF_ANY_TH=1.00 +PRIMER_RIGHT_3_SELF_ANY_TH=0.00 +PRIMER_LEFT_3_SELF_END_TH=0.00 +PRIMER_RIGHT_3_SELF_END_TH=0.00 +PRIMER_LEFT_3_HAIRPIN_TH=34.56 +PRIMER_RIGHT_3_HAIRPIN_TH=0.00 +PRIMER_LEFT_3_END_STABILITY=3.9700 +PRIMER_RIGHT_3_END_STABILITY=3.0600 +PRIMER_PAIR_3_COMPL_ANY_TH=13.72 +PRIMER_PAIR_3_COMPL_END_TH=10.53 +PRIMER_PAIR_3_PRODUCT_SIZE=136 +PRIMER_PAIR_3_PRODUCT_TM=83.6 +PRIMER_PAIR_4_PENALTY=0.361680 +PRIMER_LEFT_4_PENALTY=0.327591 +PRIMER_RIGHT_4_PENALTY=0.034089 +PRIMER_LEFT_4_SEQUENCE=CCAGAAGCTGCTCTTTCCCT +PRIMER_RIGHT_4_SEQUENCE=GCCTGGGTAGCTTTGGATGT +PRIMER_LEFT_4=219,20 +PRIMER_RIGHT_4=353,20 +PRIMER_LEFT_4_TM=59.672 +PRIMER_RIGHT_4_TM=60.034 +PRIMER_LEFT_4_GC_PERCENT=55.000 +PRIMER_RIGHT_4_GC_PERCENT=55.000 +PRIMER_LEFT_4_SELF_ANY_TH=1.00 +PRIMER_RIGHT_4_SELF_ANY_TH=0.00 +PRIMER_LEFT_4_SELF_END_TH=0.00 +PRIMER_RIGHT_4_SELF_END_TH=0.00 +PRIMER_LEFT_4_HAIRPIN_TH=34.56 +PRIMER_RIGHT_4_HAIRPIN_TH=0.00 +PRIMER_LEFT_4_END_STABILITY=4.2000 +PRIMER_RIGHT_4_END_STABILITY=3.0600 +PRIMER_PAIR_4_COMPL_ANY_TH=12.02 +PRIMER_PAIR_4_COMPL_END_TH=3.44 +PRIMER_PAIR_4_PRODUCT_SIZE=135 +PRIMER_PAIR_4_PRODUCT_TM=83.8 += diff --git a/test/primers/y.xg b/test/primers/y.xg new file mode 100644 index 0000000000..4542c78671 Binary files /dev/null and b/test/primers/y.xg differ diff --git a/test/primers/y.zipcodes b/test/primers/y.zipcodes new file mode 100644 index 0000000000..fdcf9d09aa Binary files /dev/null and b/test/primers/y.zipcodes differ diff --git a/test/t/56_vg_primers.t b/test/t/56_vg_primers.t new file mode 100644 index 0000000000..d2925b960d --- /dev/null +++ b/test/t/56_vg_primers.t @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +BASH_TAP_ROOT=../deps/bash-tap +. ../deps/bash-tap/bash-tap-bootstrap + +PATH=../bin:$PATH # for vg + +plan tests 11 + +# make graph and snarl dist index +vg construct -r small/y.fa -v small/y.vcf.gz > y.vg + +vg convert -x y.vg > y.xg + +vg index -j y.dist y.vg + +is $(vg primers primers/y.primer3_with_ref_pos.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz | wc -l) 6 "Get the expected number of primer pairs" +is $(vg primers primers/y.primer3_with_ref_pos.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz -a | wc -l) 6 "Get the expected number of primer pairs using --all-primers tag" +is $(vg primers primers/y.primer3_with_ref_pos.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz -l 2 | wc -l) 3 "Get the expected number of primer pairs using --tolerance tag" +is $(vg primers primers/y.primer3_with_ref_pos.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz -n 137 | wc -l) 4 "Get the expected number of primer pairs using --minimum-size tag" +is $(vg primers primers/y.primer3_with_ref_pos.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz -m 140 | wc -l) 4 "Get the expected number of primer pairs using --maximum-size tag" + +is $(vg primers primers/y.split.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz | wc -l) 9 "Get the expected number of primer pairs" +is $(vg primers primers/y.split.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz -a | wc -l) 11 "Get the expected number of primer pairs using --all-primers tag" +is $(vg primers primers/y.split.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz -l 2 | wc -l) 6 "Get the expected number of primer pairs using --tolerance tag" +is $(vg primers primers/y.split.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz -n 137 | wc -l) 4 "Get the expected number of primer pairs using --minimum-size tag" +is $(vg primers primers/y.split.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz -m 140 | wc -l) 7 "Get the expected number of primer pairs using --maximum-size tag" + +vg primers primers/y.primer3_with_ref_pos.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz> y.ref_pos_0.out +vg primers primers/y.primer3_with_ref_pos_11.out -x y.xg -d y.dist -r primers/y.ri -g primers/y.giraffe.gbz > y.ref_pos_11.out +diff -q <(awk '{$2=$3=$6=$7=""; print $0}' y.ref_pos_0.out) <(awk '{$2=$3=$6=$7=""; print $0}' y.ref_pos_11.out) > diff_0_11 +is $(cat diff_0_11 | wc -l) 0 "These two output files should have identical primers except for their positions on template" + +# clean up +rm diff_0_11 +rm y.vg y.xg y.dist +rm y.ref_pos_0.out y.ref_pos_11.out