Skip to content

Commit

Permalink
intermediate
Browse files Browse the repository at this point in the history
  • Loading branch information
MarkusRainerSchmidt committed May 12, 2023
1 parent 11bcc82 commit c7ab8e2
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 20 deletions.
11 changes: 5 additions & 6 deletions libs/ms/inc/ms/util/parameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1032,10 +1032,10 @@ class GlobalParameter : public ParameterSetBase

/* Constructor */
GlobalParameter( )
: xJumpS( this, "fuzziness-s", "Maximal fuzziness for entries.", SV_PARAMETERS, 200 ),
: xJumpS( this, "fuzziness-s", "Maximal fuzziness for entries.", SV_PARAMETERS, 400 ),
xJumpSNeg( this, "fuzziness-s-neg", "Maximal fuzziness for entries.", SV_PARAMETERS,
200 ),
xJumpM( this, "fuzziness-m", "Fuzziness slope.", SV_PARAMETERS, 0.5 ),
400 ),
xJumpM( this, "fuzziness-m", "Fuzziness slope.", SV_PARAMETERS, 0.25 ),
xJumpH( this, "fuzziness-h", "Fuzziness zero-point.", SV_PARAMETERS, 25 ),
xSeedDirFuzziness( this, "Seed Dir Fuzziness", "Absolute fuzziness in seed direction.", SV_PARAMETERS, 3,
checkPositiveValue ),
Expand Down Expand Up @@ -1124,20 +1124,19 @@ class ParameterSetManager
setCommonSvParameters( xParametersSets[ "sv-illumina" ] );
xParametersSets[ "sv-illumina" ]->xDoDummyJumps->set( false );
xParametersSets[ "sv-illumina" ]->xMinReadsInCall->set( 10 );
xParametersSets[ "sv-illumina" ]->xMaxSizeEdge->set( 200 );
//xParametersSets[ "sv-illumina" ]->xMaxSizeEdge->set( 200 );
xParametersSets[ "sv-illumina" ]->xHarmScoreMin->set( 10 );
xParametersSets[ "sv-illumina" ]->xHarmScoreMinRel->set( 0.1 );
xParametersSets[ "sv-illumina" ]->xMinNtAfterReseedingRelative->set( 0.4 );
xParametersSets[ "sv-illumina" ]->xMinNtInSoc->set( 25 );
xParametersSets[ "sv-illumina" ]->xMinNtInSocRelative->set( 0.1 );

// xParametersSets[ "sv-illumina" ]->xMinSeedSizeSV->set( 16 ); @todo does this help or no ?

xParametersSets.emplace( "sv-pacbio", std::make_shared<Presetting>( "SV-PacBio" ) );
setCommonSvParameters( xParametersSets[ "sv-pacbio" ] );
xParametersSets[ "sv-pacbio" ]->xSoCWidth->set( 3000 );
xParametersSets[ "sv-pacbio" ]->xMaxSizeReseed->set( 1000 );
xParametersSets[ "sv-pacbio" ]->xMinSizeEdge->set( 200 );
//xParametersSets[ "sv-pacbio" ]->xMinSizeEdge->set( 200 );
xParametersSets[ "sv-pacbio" ]->xMinNtAfterReseeding->set( 600 );
xParametersSets[ "sv-pacbio" ]->xHarmScoreMinRel->set( 0 );
xParametersSets[ "sv-pacbio" ]->xHarmScoreMin->set( 25 );
Expand Down
24 changes: 21 additions & 3 deletions libs/msv/inc/msv/container/sv_db/tables/svCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ template <typename DBCon> class SvCallTable : public SvCallTableType<DBCon>
SQLStatement<DBCon> xEnableExtension;
SQLStatement<DBCon> xCopyPath;
SQLStatement<DBCon> xExtractSmallCalls;
SQLStatement<DBCon> xExtractLargeCalls;

public:
// Consider: Place the table on global level
Expand Down Expand Up @@ -185,7 +186,12 @@ template <typename DBCon> class SvCallTable : public SvCallTableType<DBCon>
"UPDATE sv_call_table "
"SET sv_caller_run_id = ? "
"WHERE sv_caller_run_id = ? "
"AND GREATEST(to_pos - from_pos, inserted_sequence_size) < ? " )
"AND GREATEST(to_pos - from_pos, inserted_sequence_size) < ? " ),
xExtractLargeCalls( pConnection,
"UPDATE sv_call_table "
"SET sv_caller_run_id = ? "
"WHERE sv_caller_run_id = ? "
"AND GREATEST(to_pos - from_pos, inserted_sequence_size) >= ? " )
{} // default constructor


Expand Down Expand Up @@ -294,12 +300,24 @@ template <typename DBCon> class SvCallTable : public SvCallTableType<DBCon>
deleteCall( rCall.iId );
} // method

inline void extractSmallCalls(int64_t iCallerRunId, int64_t iMaxSize, std::string sName, std::string sDesc)
inline PriKeyDefaultType extractSmallCalls(int64_t iCallerRunId, int64_t iMaxSize, std::string sName,
std::string sDesc)
{
auto pRun = std::make_shared<SvCallerRunTable<DBCon>>( pConnection );
auto iNewId = pRun->insert( sName, sDesc, pRun->getSvJumpRunId( iCallerRunId) );
PriKeyDefaultType iNewId = pRun->insert( sName, sDesc, pRun->getSvJumpRunId( iCallerRunId) );
xExtractSmallCalls.exec( iNewId, iCallerRunId, iMaxSize );
genIndices( iNewId );
return iNewId;
} // method

inline PriKeyDefaultType extractLargeCalls(int64_t iCallerRunId, int64_t iMinSize, std::string sName,
std::string sDesc)
{
auto pRun = std::make_shared<SvCallerRunTable<DBCon>>( pConnection );
PriKeyDefaultType iNewId = pRun->insert( sName, sDesc, pRun->getSvJumpRunId( iCallerRunId) );
xExtractLargeCalls.exec( iNewId, iCallerRunId, iMinSize );
genIndices( iNewId );
return iNewId;
} // method

inline void copyPath( int64_t iCallerRunIdFrom, int64_t iCallerRunIdTo, int64_t iAllowedDist )
Expand Down
7 changes: 7 additions & 0 deletions libs/msv/inc/msv/container/sv_db/tables/svCallerRun.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ template <typename DBCon> class SvCallerRunTable : public SvCallerRunTableType<D
std::shared_ptr<DBCon> pDatabase;
SQLStatement<DBCon> xDelete; // Discuss Markus: Shouldn't this be a statement?
SQLQuery<DBCon, int64_t> xGetId;
SQLQuery<DBCon, int64_t> xHasId;
SQLQuery<DBCon, int64_t> xGetIds;
SQLQuery<DBCon, std::string, std::string, int64_t, int64_t> xGetName;
SQLQuery<DBCon, uint64_t> xNum;
Expand All @@ -49,6 +50,7 @@ template <typename DBCon> class SvCallerRunTable : public SvCallerRunTableType<D
pDatabase( pDB ),
xDelete( pDB, "DELETE FROM sv_caller_run_table WHERE _name_ = ?" ),
xGetId( pDB, "SELECT id FROM sv_caller_run_table WHERE _name_ = ? ORDER BY time_stamp ASC LIMIT 1" ),
xHasId( pDB, "SELECT COUNT(*) FROM sv_caller_run_table WHERE _name_ = ?" ),
xGetIds( pDB, "SELECT id FROM sv_caller_run_table" ),
xGetName( pDB, "SELECT _name_, _desc_, time_stamp, "
"CASE WHEN sv_jump_run_id is NULL THEN -1 ELSE sv_jump_run_id END AS v1 "
Expand All @@ -75,6 +77,11 @@ template <typename DBCon> class SvCallerRunTable : public SvCallerRunTableType<D
return xGetId.scalar( rS );
} // method

inline bool hasName( std::string& rS )
{
return xHasId.scalar( rS ) > 0;
} // method

inline std::vector<int64_t> getIds( )
{
return xGetIds.template executeAndStoreInVector<0>( );
Expand Down
22 changes: 12 additions & 10 deletions libs/msv/python/computeAccuracyRecall.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ def compute_accuracy_recall(dataset_name, blurs, gt_ids, run_ids, out_prefix):
count_calls_from_db = SvCallsFromDb(db_conn)
for blur in blurs:
for gt_id in gt_ids:
for run_id in run_ids[gt_id]:
print("computing", blur, gt_id, run_id)
stats, gt_total = count_calls_from_db.count(run_id, gt_id, blur)
with open(out_prefix + dataset_name + "-" + str(run_id) + "-" + str(blur) + ".tsv", "w") as out_file:
out_file.write("//|ground truth| = " + str(gt_total) + "\n")
out_file.write("//#supporting reads\t#true positives\t#num entries\trecall\taccuracy\n")
for x, num_calls, num_tp in stats:
if num_calls > 0:
out_file.write(str(x) + "\t" + str(num_tp) + "\t" + str(num_calls) +
"\t" + str(num_tp/gt_total) + "\t" + str(num_tp/num_calls) + "\n")
if not gt_id is None:
for run_id in run_ids[gt_id]:
if not run_id is None:
print("computing", blur, gt_id, run_id)
stats, gt_total = count_calls_from_db.count(run_id, gt_id, blur)
with open(out_prefix + dataset_name + "-" + str(run_id) + "-" + str(blur) + ".tsv", "w") as out_file:
out_file.write("//|ground truth| = " + str(gt_total) + "\n")
out_file.write("//#supporting reads\t#true positives\t#num entries\trecall\taccuracy\n")
for x, num_calls, num_tp in stats:
if num_calls > 0:
out_file.write(str(x) + "\t" + str(num_tp) + "\t" + str(num_calls) +
"\t" + str(num_tp/gt_total) + "\t" + str(num_tp/num_calls) + "\n")
3 changes: 2 additions & 1 deletion libs/msv/python/sv_visualization/renderer/_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def setup(self):
if not self.db_conn is None:
run_table = SvCallerRunTable(self.db_conn)
for run_id in run_table.getIds():
text = run_table.getName(run_id) + " - " + run_table.getDate(run_id) + " - " + run_table.getDesc(run_id)
text = str(run_id) + " - " + run_table.getName(run_id) + " - " + run_table.getDate(run_id) + " - "
text += run_table.getDesc(run_id)
text += " - " + str(SvCallTable(self.db_conn).num_calls(run_id, self.widgets.score_slider.value[0]))
text += " entries"
menu.append((text, str(run_id)))
Expand Down
3 changes: 3 additions & 0 deletions libs/msv/src/container/sv_db/svSchema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ void exportSoCDbWriter( libMS::SubmoduleOrganizer& xOrganizer )
.def( "filter_calls_with_high_score", &SvCallTable<DBConSingle>::filterCallsWithHighScore )
.def( "gen_indices", &SvCallTable<DBConSingle>::genIndices )
.def( "extract_small_calls", &SvCallTable<DBConSingle>::extractSmallCalls )
.def( "extract_large_calls", &SvCallTable<DBConSingle>::extractLargeCalls )
.def( "insert_call", &SvCallTable<DBConSingle>::insertCall );

py::class_<KMerFilterTable<DBConSingle>, std::shared_ptr<KMerFilterTable<DBConSingle>>>( xOrganizer.util( ),
Expand Down Expand Up @@ -183,6 +184,8 @@ void exportSoCDbWriter( libMS::SubmoduleOrganizer& xOrganizer )
"SvCallerRunTable" )
.def( py::init<std::shared_ptr<DBConSingle>>( ) )
.def( "getIds", &SvCallerRunTable<DBConSingle>::getIds )
.def( "getId", &SvCallerRunTable<DBConSingle>::getId )
.def( "hasName", &SvCallerRunTable<DBConSingle>::hasName )
.def( "getName", &SvCallerRunTable<DBConSingle>::getName )
.def( "exists", &SvCallerRunTable<DBConSingle>::exists )
.def( "getDesc", &SvCallerRunTable<DBConSingle>::getDesc )
Expand Down

0 comments on commit c7ab8e2

Please sign in to comment.