Skip to content

Commit

Permalink
Writing data source attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
mhekkel committed Jan 14, 2025
1 parent 565f28f commit f3c89bb
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 37 deletions.
3 changes: 2 additions & 1 deletion changelog
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
Version 2.2.0
- Added support for
- Added option to write out source of data into json file
- Updated node modules

Version 2.1.1
- Fix a bug that caused too many residues being placed where
Expand Down
59 changes: 35 additions & 24 deletions src/alphafill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ std::tuple<UniqueType, std::string> isUniqueLigand(const cif::mm::structure &str
result = { UniqueType::MoreAtoms, np.get_asym_id() };
else
result = { UniqueType::Seen, np.get_asym_id() };

break;
}
}
Expand All @@ -158,7 +158,8 @@ std::tuple<UniqueType, std::string> isUniqueLigand(const cif::mm::structure &str

for (auto &a : lig.atoms())
atoms_a.emplace_back(a.get_label_atom_id(), a.get_location());
sort(atoms_a.begin(), atoms_a.end(), [](auto &a, auto &b) { return std::get<0>(a) < std::get<0>(b); });
sort(atoms_a.begin(), atoms_a.end(), [](auto &a, auto &b)
{ return std::get<0>(a) < std::get<0>(b); });

for (auto &np : structure.non_polymers())
{
Expand All @@ -169,7 +170,8 @@ std::tuple<UniqueType, std::string> isUniqueLigand(const cif::mm::structure &str

for (auto &a : np.atoms())
atoms_b.emplace_back(a.get_label_atom_id(), a.get_location());
sort(atoms_b.begin(), atoms_b.end(), [](auto &a, auto &b) { return std::get<0>(a) < std::get<0>(b); });
sort(atoms_b.begin(), atoms_b.end(), [](auto &a, auto &b)
{ return std::get<0>(a) < std::get<0>(b); });

std::vector<point> pa, pb;

Expand Down Expand Up @@ -425,7 +427,7 @@ int create_index(int argc, char *const argv[])
fs::path file = fiter->path();

std::string name = file.filename().string();
if (not (cif::ends_with(name, "_final.cif") or cif::ends_with(name, "_final.cif.gz")))
if (not(cif::ends_with(name, "_final.cif") or cif::ends_with(name, "_final.cif.gz")))
continue;

q1.push(file);
Expand Down Expand Up @@ -490,7 +492,8 @@ void check_blast_index()

// --------------------------------------------------------------------

zeep::json::element alphafill(cif::datablock &db, const std::vector<PAE_matrix> &v_pae, alphafill_progress_cb &&progress)
zeep::json::element alphafill(cif::datablock &db, const std::string &source,
const std::vector<PAE_matrix> &v_pae, alphafill_progress_cb &&progress)
{
using namespace std::literals;
using namespace cif::literals;
Expand Down Expand Up @@ -540,7 +543,8 @@ zeep::json::element alphafill(cif::datablock &db, const std::vector<PAE_matrix>
json result = {
{ "id", afID },
{ "date", ss.str() },
{ "alphafill_version", kVersionNumber }
{ "alphafill_version", kVersionNumber },
{ "source", source }
};

json &hits = result["hits"] = json::array();
Expand Down Expand Up @@ -573,7 +577,7 @@ zeep::json::element alphafill(cif::datablock &db, const std::vector<PAE_matrix>
auto j = seq.find(')', i + 1);
if (j == std::string::npos or j > i + 2)
throw std::runtime_error("Invalid sequence");

seq.erase(i, j - i + 1);
i = seq.find('(', i + 1);
}
Expand Down Expand Up @@ -810,14 +814,10 @@ zeep::json::element alphafill(cif::datablock &db, const std::vector<PAE_matrix>
json r_hsp{
{ "pdb_id", pdb_id },
{ "pdb_asym_id", pdb_res.front()->get_asym_id() },
{
"alignment", {
{ "af_start", hsp.mQueryStart },
{ "identity", hsp.identity() },
{ "length", hsp.length() },
{ "pdb_start", hsp.mTargetStart }
}
},
{ "alignment", { { "af_start", hsp.mQueryStart },
{ "identity", hsp.identity() },
{ "length", hsp.length() },
{ "pdb_start", hsp.mTargetStart } } },
{ "global_rmsd", rmsd }
};

Expand Down Expand Up @@ -876,29 +876,28 @@ zeep::json::element alphafill(cif::datablock &db, const std::vector<PAE_matrix>
auto &rep_res = af_structure.get_residue(replace_id);
if (cif::VERBOSE > 0)
std::cerr << "Residue " << res << " has more atoms than the first transplant " << rep_res << '\n';

try
{
af_structure.remove_residue(rep_res);

for (auto &hit : hits)
{
auto ti = std::find_if(hit["transplants"].begin(), hit["transplants"].end(), [id=replace_id](json &e) {
return e["asym_id"] == id;
});
auto ti = std::find_if(hit["transplants"].begin(), hit["transplants"].end(), [id = replace_id](json &e)
{ return e["asym_id"] == id; });
if (ti != hit["transplants"].end())
{
hit["transplants"].erase(ti);
break;
}
}
}
catch(const std::exception& e)
catch (const std::exception &e)
{
if (cif::VERBOSE > 0)
std::cerr << "Failed to remove residue with asym ID " << replace_id << ": " << e.what() << '\n';
}

break;
}

Expand Down Expand Up @@ -1110,11 +1109,15 @@ zeep::json::element alphafill(cif::datablock &db, const std::vector<PAE_matrix>
af_structure.cleanup_empty_categories();

auto &software = af_structure.get_category("software");
software.emplace({ { "pdbx_ordinal", software.size() + 1 }, // TODO: should we check this ordinal number???
software.emplace({
//
{ "pdbx_ordinal", software.size() + 1 }, // TODO: should we check this ordinal number???
{ "name", "alphafill" },
{ "version", kVersionNumber },
{ "date", kRevisionDate },
{ "classification", "model annotation" } });
{ "classification", "model annotation" }
//
});

return result;
}
Expand Down Expand Up @@ -1179,6 +1182,8 @@ int alphafill_main(int argc, char *const argv[])
mcfp::make_hidden_option<int>("blast-gap-open", 11, "Blast penalty for gap open"),
mcfp::make_hidden_option<int>("blast-gap-extend", 1, "Blast penalty for gap extend"),

mcfp::make_option("data-source", "user", "Data source for input model"),

mcfp::make_option<size_t>("threads,t", std::thread::hardware_concurrency(), "Number of threads to use, zero means all available cores"),

mcfp::make_hidden_option<std::string>("custom-dir", (fs::temp_directory_path() / "alphafill").string(), "Directory for custom built entries")
Expand Down Expand Up @@ -1220,6 +1225,12 @@ int alphafill_main(int argc, char *const argv[])
return 1;
}

if (config.get("data-source") != "AFDB" and config.get("data-source") != "BFVD" and config.get("data-source") != "user")
{
std::cerr << "Invalid data-source, allowed values are 'AFDB', 'BFVD' and 'user'\n";
return 1;
}

fs::path paein;

if (config.has("pae-file"))
Expand All @@ -1244,7 +1255,7 @@ int alphafill_main(int argc, char *const argv[])
if (fs::exists(paein))
v_pae = load_pae_from_file(paein);

json metadata = alphafill(f.front(), v_pae, my_progress{});
json metadata = alphafill(f.front(), config.get("data-source"), v_pae, my_progress{});

if (config.operands().size() == 2)
{
Expand Down
3 changes: 2 additions & 1 deletion src/alphafill.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ struct alphafill_progress_cb

using PAE_matrix = cif::matrix<uint8_t>;

zeep::json::element alphafill(cif::datablock &db, const std::vector<PAE_matrix> &pae, alphafill_progress_cb &&progress);
zeep::json::element alphafill(cif::datablock &db, const std::string &source,
const std::vector<PAE_matrix> &pae, alphafill_progress_cb &&progress);
int alphafill_main(int argc, char *const argv[]);

int create_index(int argc, char *const argv[]);
Expand Down
2 changes: 1 addition & 1 deletion src/data-service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ void data_service::process_queued(const std::filesystem::path &xyzin, const std:
fs::rename(paein, m_work_dir / paein.filename(), ec);
}

auto metadata = alphafill(f.front(), pae_data, data_service_progress{ m_progress });
auto metadata = alphafill(f.front(), "user", pae_data, data_service_progress{ m_progress });

try
{
Expand Down
19 changes: 9 additions & 10 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include "main.hpp"
#include "alphafill.hpp"
#include "config.hpp"
#include "main.hpp"
#include "revision.hpp"
#include "validate.hpp"

Expand All @@ -38,9 +38,9 @@
#include <thread>

#if defined(BUILD_WEB_APPLICATION)
#include "data-service.hpp"
#include "db-connection.hpp"
#include "server.hpp"
# include "data-service.hpp"
# include "db-connection.hpp"
# include "server.hpp"
#endif

namespace fs = std::filesystem;
Expand Down Expand Up @@ -76,8 +76,7 @@ int rebuild_db_main(int argc, char *const argv[])

mcfp::make_option<size_t>("threads,t", std::thread::hardware_concurrency(), "Number of threads to use, zero means all available cores"),

mcfp::make_hidden_option<std::string>("custom-dir", (fs::temp_directory_path() / "alphafill").string(), "Directory for custom built entries")
);
mcfp::make_hidden_option<std::string>("custom-dir", (fs::temp_directory_path() / "alphafill").string(), "Directory for custom built entries"));

parse_argv(argc, argv, config);

Expand Down Expand Up @@ -199,19 +198,19 @@ int main(int argc, char *const argv[])
else
{
const std::string usage =
R"(usage: alphafill command [options]
R"(usage: alphafill command [options]
where command is one of
create-index Create a FastA file based on data in the PDB files
(A FastA file is required to process files)
process Process an AlphaFill structure)"
#if defined(BUILD_WEB_APPLICATION)
R"(
R"(
rebuild-db Rebuild the databank
server Start a web server instance)"
#endif
R"(
R"(
The following options are always recognized:
)";
Expand All @@ -235,7 +234,7 @@ The following options are always recognized:

if (config.operands().empty())
std::cerr << "Missing command"
<< "\n\n";
<< "\n\n";

std::cerr << config << '\n';
return config.has("help") ? 0 : 1;
Expand Down

0 comments on commit f3c89bb

Please sign in to comment.