From d99d79c15463abfe260d5f402493355248262222 Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Tue, 10 Sep 2024 22:52:31 +0900 Subject: [PATCH 1/2] single chain allowing multimersearch --- src/strucclustutils/scoremultimer.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index 9679013e..03946144 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -181,7 +181,7 @@ class DBSCANCluster { public: DBSCANCluster(SearchResult &searchResult, std::set &finalClusters, double minCov) : searchResult(searchResult), finalClusters(finalClusters) { cLabel = 0; - minimumClusterSize = std::max(MULTIPLE_CHAINED_COMPLEX, (unsigned int) ((double) searchResult.qChainKeys.size() * minCov)); + minimumClusterSize = (unsigned int) ((double) searchResult.qChainKeys.size() * minCov); maximumClusterSize = std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()); maximumClusterNum = searchResult.alnVec.size() / maximumClusterSize; prevMaxClusterSize = 0; @@ -191,6 +191,9 @@ class DBSCANCluster { } bool getAlnClusters() { + if (searchResult.qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX || searchResult.dbChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) + return earlyStopForSingleChainComplex(); + // rbh filter filterAlnsByRBH(); fillDistMatrix(); @@ -225,6 +228,16 @@ class DBSCANCluster { std::map qBestTmScore; std::map dbBestTmScore; + bool earlyStopForSingleChainComplex() { + if (minimumClusterSize >= MULTIPLE_CHAINED_COMPLEX) + return finishDBSCAN(); + + for (size_t alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++ ) { + finalClusters.insert(cluster_t(alnIdx)); + } + return finishDBSCAN(); + } + bool runDBSCAN() { unsigned int neighborIdx; unsigned int neighborAlnIdx; @@ -519,7 +532,7 @@ class ComplexScorer { continue; } paredSearchResult.standardize(); - if (!paredSearchResult.alnVec.empty() && currDbChainKeys.size() >= MULTIPLE_CHAINED_COMPLEX) + if (!paredSearchResult.alnVec.empty()) searchResults.emplace_back(paredSearchResult); paredSearchResult.alnVec.clear(); @@ -531,7 +544,7 @@ class ComplexScorer { } currAlns.clear(); paredSearchResult.standardize(); - if (!paredSearchResult.alnVec.empty() && currDbChainKeys.size() >= MULTIPLE_CHAINED_COMPLEX) + if (!paredSearchResult.alnVec.empty()) searchResults.emplace_back(paredSearchResult); paredSearchResult.alnVec.clear(); @@ -714,8 +727,6 @@ int scoremultimer(int argc, const char **argv, const Command &command) { for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) { unsigned int qComplexId = qComplexIndices[qCompIdx]; std::vector &qChainKeys = qComplexIdToChainKeysMap.at(qComplexId); - if (qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) - continue; complexScorer.getSearchResults(qComplexId, qChainKeys, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, searchResults); // for each db complex for (size_t dbId = 0; dbId < searchResults.size(); dbId++) { From 9a2b7da48eef75c56104293e68a754ac78a10cbb Mon Sep 17 00:00:00 2001 From: Woosub-Kim Date: Wed, 11 Sep 2024 03:04:56 +0900 Subject: [PATCH 2/2] bug fixed: single elemented vector for single chain alignment --- src/strucclustutils/scoremultimer.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/strucclustutils/scoremultimer.cpp b/src/strucclustutils/scoremultimer.cpp index 03946144..7e9e514d 100644 --- a/src/strucclustutils/scoremultimer.cpp +++ b/src/strucclustutils/scoremultimer.cpp @@ -191,7 +191,8 @@ class DBSCANCluster { } bool getAlnClusters() { - if (searchResult.qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX || searchResult.dbChainKeys.size() < MULTIPLE_CHAINED_COMPLEX) + // if Query or Target is a Single Chain Complex. + if (std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()) < MULTIPLE_CHAINED_COMPLEX) return earlyStopForSingleChainComplex(); // rbh filter @@ -232,8 +233,9 @@ class DBSCANCluster { if (minimumClusterSize >= MULTIPLE_CHAINED_COMPLEX) return finishDBSCAN(); - for (size_t alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++ ) { - finalClusters.insert(cluster_t(alnIdx)); + for (unsigned int alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++ ) { + neighbors = {alnIdx}; + finalClusters.insert(neighbors); } return finishDBSCAN(); } @@ -375,6 +377,7 @@ class DBSCANCluster { // Too few alns => do nothing and finish it if (searchResult.alnVec.size() < minimumClusterSize) return finishDBSCAN(); + // All alns as a cluster for (size_t alnIdx=0; alnIdx finish it without clustering - prevMaxClusterSize = neighbors.size(); finalClusters.insert(neighbors); return finishDBSCAN(); }