-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwrite_table.cpp
95 lines (78 loc) · 3.01 KB
/
write_table.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// MUMU
// Copyright (C) 2020-2024 Frederic Mahe
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Contact: Frederic Mahe <frederic.mahe@cirad.fr>,
// UMR PHIM, CIRAD - TA A-120/K
// Campus International de Baillarguet
// 34398 MONTPELLIER CEDEX 5
// France
#include <algorithm>
#include <fstream>
#include <functional>
#include <ios>
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <tuple>
#include "mumu.h"
struct OTU_stats {
std::string OTU_id; // should be a string_view
long int spread {0}; // refactor; type is not correct
unsigned long int abundance {0};
auto operator<=>(OTU_stats const& rhs) const {
// order by abundance,
// if equal, order by spread,
// if equal, lexicographic ID order (A, B, ..., a, b, c, ...)
return
std::tie(abundance, spread, rhs.OTU_id) <=>
std::tie(rhs.abundance, rhs.spread, OTU_id);
}
auto operator==(OTU_stats const& rhs) const -> bool = default;
};
[[nodiscard]]
auto extract_OTU_stats (std::unordered_map<std::string, struct OTU> &OTUs)
-> std::vector<struct OTU_stats> {
// goal is to get a sortable list of OTUs
std::vector<struct OTU_stats> sorted_OTUs;
sorted_OTUs.reserve(OTUs.size()); // probably 25-50% too much
for (auto const& otu: OTUs) { // replace with copy_if()?
const std::string& OTU_id {otu.first};
if (OTUs[OTU_id].is_merged) { continue; } // skip merged OTUs
sorted_OTUs.push_back(OTU_stats {
.OTU_id = OTU_id,
.spread = OTUs[OTU_id].spread,
.abundance = OTUs[OTU_id].sum_reads}
);
}
// sort by decreasing abundance, spread and id name
std::ranges::sort(sorted_OTUs, std::ranges::greater{});
sorted_OTUs.shrink_to_fit(); // reduces memory usage
return sorted_OTUs;
}
auto write_table (std::unordered_map<std::string, struct OTU> &OTUs,
const std::string &new_otu_table_name) -> void {
std::cout << "write new OTU table... ";
// re-open output file
std::ofstream new_otu_table {new_otu_table_name, std::ios_base::app};
// list and sort remaining OTUs
const auto sorted_OTUs {extract_OTU_stats(OTUs)};
// output
for (auto const& otu: sorted_OTUs) {
new_otu_table << otu.OTU_id;
for (auto const& sample: OTUs[otu.OTU_id].samples) { // C++23 refactoring: std::views::join_with('\t');
new_otu_table << sepchar << sample;
}
new_otu_table << '\n';
}
std::cout << "done, " << sorted_OTUs.size() << " entries\n";
}