Skip to content

Commit

Permalink
Merge pull request #345 from steineggerlab/foldseek_multiple_with_sin…
Browse files Browse the repository at this point in the history
…glechain

Foldseek multimersearch allows single chain
  • Loading branch information
Woosub-Kim committed Sep 11, 2024
2 parents 52029c0 + 9a2b7da commit 2256e21
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions src/strucclustutils/scoremultimer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ class DBSCANCluster {
public:
DBSCANCluster(SearchResult &searchResult, std::set<cluster_t> &finalClusters, double minCov) : searchResult(searchResult), finalClusters(finalClusters) {
cLabel = 0;
minimumClusterSize = std::max(MULTIPLE_CHAINED_COMPLEX, (unsigned int) ((double) searchResult.qChainKeys.size() * minCov));
minimumClusterSize = (unsigned int) ((double) searchResult.qChainKeys.size() * minCov);
maximumClusterSize = std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size());
maximumClusterNum = searchResult.alnVec.size() / maximumClusterSize;
prevMaxClusterSize = 0;
Expand All @@ -191,6 +191,10 @@ class DBSCANCluster {
}

bool getAlnClusters() {
// if Query or Target is a Single Chain Complex.
if (std::min(searchResult.qChainKeys.size(), searchResult.dbChainKeys.size()) < MULTIPLE_CHAINED_COMPLEX)
return earlyStopForSingleChainComplex();

// rbh filter
filterAlnsByRBH();
fillDistMatrix();
Expand Down Expand Up @@ -225,6 +229,17 @@ class DBSCANCluster {
std::map<unsigned int, float> qBestTmScore;
std::map<unsigned int, float> dbBestTmScore;

bool earlyStopForSingleChainComplex() {
if (minimumClusterSize >= MULTIPLE_CHAINED_COMPLEX)
return finishDBSCAN();

for (unsigned int alnIdx = 0; alnIdx < searchResult.alnVec.size(); alnIdx++ ) {
neighbors = {alnIdx};
finalClusters.insert(neighbors);
}
return finishDBSCAN();
}

bool runDBSCAN() {
unsigned int neighborIdx;
unsigned int neighborAlnIdx;
Expand Down Expand Up @@ -362,6 +377,7 @@ class DBSCANCluster {
// Too few alns => do nothing and finish it
if (searchResult.alnVec.size() < minimumClusterSize)
return finishDBSCAN();
// All alns as a cluster
for (size_t alnIdx=0; alnIdx<searchResult.alnVec.size(); alnIdx++) {
neighbors.emplace_back(alnIdx);
}
Expand All @@ -371,7 +387,6 @@ class DBSCANCluster {
return runDBSCAN();
}
// Already good => finish it without clustering
prevMaxClusterSize = neighbors.size();
finalClusters.insert(neighbors);
return finishDBSCAN();
}
Expand Down Expand Up @@ -519,7 +534,7 @@ class ComplexScorer {
continue;
}
paredSearchResult.standardize();
if (!paredSearchResult.alnVec.empty() && currDbChainKeys.size() >= MULTIPLE_CHAINED_COMPLEX)
if (!paredSearchResult.alnVec.empty())
searchResults.emplace_back(paredSearchResult);

paredSearchResult.alnVec.clear();
Expand All @@ -531,7 +546,7 @@ class ComplexScorer {
}
currAlns.clear();
paredSearchResult.standardize();
if (!paredSearchResult.alnVec.empty() && currDbChainKeys.size() >= MULTIPLE_CHAINED_COMPLEX)
if (!paredSearchResult.alnVec.empty())
searchResults.emplace_back(paredSearchResult);

paredSearchResult.alnVec.clear();
Expand Down Expand Up @@ -714,8 +729,6 @@ int scoremultimer(int argc, const char **argv, const Command &command) {
for (size_t qCompIdx = 0; qCompIdx < qComplexIndices.size(); qCompIdx++) {
unsigned int qComplexId = qComplexIndices[qCompIdx];
std::vector<unsigned int> &qChainKeys = qComplexIdToChainKeysMap.at(qComplexId);
if (qChainKeys.size() < MULTIPLE_CHAINED_COMPLEX)
continue;
complexScorer.getSearchResults(qComplexId, qChainKeys, dbChainKeyToComplexIdMap, dbComplexIdToChainKeysMap, searchResults);
// for each db complex
for (size_t dbId = 0; dbId < searchResults.size(); dbId++) {
Expand Down

0 comments on commit 2256e21

Please sign in to comment.