Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add concept sets search #2177

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
d083082
Concept set search WIP: add API method
anton-abushkevich Oct 31, 2022
0b5e68f
Concept sets search - initial implementation
anton-abushkevich Nov 21, 2022
84ceab5
Concept sets search - solr core config
anton-abushkevich Nov 21, 2022
6e2dcb4
Concept sets search - add search availability check
anton-abushkevich Nov 22, 2022
6a49a78
Concept sets search - add search availability checks to reindex
anton-abushkevich Nov 22, 2022
5d6e747
Concept sets search - add locales to new buttons
TitrS Nov 22, 2022
b4c4dec
Concept sets search - change key of clear search
TitrS Nov 22, 2022
db83d54
Concept sets search - search availability check now returns boolean
anton-abushkevich Nov 22, 2022
e1511a0
Concept sets search - add solr conf file
anton-abushkevich Nov 23, 2022
6afec85
Concept sets search - add locales to configuration
TitrS Nov 23, 2022
50d8d9c
Merge branch 'add_concept_sets_search' of https://github.com/OHDSI/We…
TitrS Nov 23, 2022
6ae0594
Concept sets search - reindex and search small fixes, async reindex i…
anton-abushkevich Nov 25, 2022
cb61e56
reindex was moved to spring job
ssuvorov-fls Nov 28, 2022
7154d78
reindex was moved to spring job
ssuvorov-fls Nov 30, 2022
6de0796
added permissions for reindex status endpoint
ssuvorov-fls Nov 30, 2022
e209b33
Concept sets search - catch and ignore ConceptNotExistException on CS…
anton-abushkevich Dec 1, 2022
a0f8b64
return empty list of documents when source does not contain required …
ssuvorov-fls Dec 1, 2022
70e149b
return empty list of documents when source does not contain required …
ssuvorov-fls Dec 1, 2022
eaf8f25
return empty list of documents when source does not contain required …
ssuvorov-fls Dec 1, 2022
e563453
return empty list of documents when source does not contain required …
ssuvorov-fls Dec 1, 2022
246477b
return empty list of documents when source does not contain required …
ssuvorov-fls Dec 1, 2022
8e94727
return empty list of documents when source does not contain required …
ssuvorov-fls Dec 1, 2022
f90e10f
solr functionalty was moved to sparate location
ssuvorov-fls Dec 9, 2022
8008322
solr functionalty was moved to separate location
ssuvorov-fls Dec 13, 2022
31698e7
Concept sets search - build fix & add maxResults parameter
anton-abushkevich Dec 13, 2022
a2b19fc
cherry pick from solr_refactoring
ssuvorov-fls Feb 8, 2023
b8459e9
fixed after review
ssuvorov-fls Feb 8, 2023
cbbf407
Merge remote-tracking branch 'origin/master' into add_concept_sets_se…
ssuvorov-fls Feb 13, 2023
5e665d6
merge master into current branch
ssuvorov-fls Feb 13, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
<solr.endpoint></solr.endpoint>
<solr.query.prefix>{!complexphrase inOrder=true}</solr.query.prefix>
<solr.version>8.11.2</solr.version>
<solr.conceptsets.core>webapi-conceptsets</solr.conceptsets.core>
<solr.conceptsets.maxResults>1000</solr.conceptsets.maxResults>
<!-- Heracles properties -->
<heracles.smallcellcount>5</heracles.smallcellcount>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.ohdsi.webapi.cohortanalysis;

import org.ohdsi.webapi.info.ConfigurationInfo;
import org.ohdsi.webapi.extcommon.ConfigurationInfo;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,17 @@
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;

/**
*
* @author fdefalco
*/
public interface ConceptSetRepository extends CrudRepository<ConceptSet, Integer> {
ConceptSet findById(Integer conceptSetId);

@Query("select cs from ConceptSet cs where cs.id in (:conceptSetIds)")
List<ConceptSet> findAllById(@Param("conceptSetIds")final Set<Integer> conceptSetIds);

@Deprecated
@Query("SELECT cs FROM ConceptSet cs WHERE cs.name = :conceptSetName and cs.id <> :conceptSetId")
Expand All @@ -44,4 +48,6 @@ public interface ConceptSetRepository extends CrudRepository<ConceptSet, Integer

@Query("SELECT DISTINCT cs FROM ConceptSet cs JOIN FETCH cs.tags t WHERE lower(t.name) in :tagNames")
List<ConceptSet> findByTags(@Param("tagNames") List<String> tagNames);

List<ConceptSet> findAll();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
package org.ohdsi.webapi.conceptset.search;

import org.ohdsi.circe.vocabulary.ConceptSetExpression;
import org.ohdsi.webapi.Constants;
import org.ohdsi.webapi.conceptset.ConceptSet;
import org.ohdsi.webapi.job.JobExecutionResource;
import org.ohdsi.webapi.job.JobTemplate;
import org.ohdsi.webapi.service.ConceptSetService;
import org.ohdsi.webapi.service.VocabularyService;
import org.ohdsi.webapi.service.dto.ConceptSetReindexDTO;
import org.ohdsi.webapi.vocabulary.Concept;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.ChunkListener;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.StepExecutionListener;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.explore.JobExplorer;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import static org.ohdsi.webapi.Constants.Params.JOB_NAME;

@Service
public class ConceptSetReindexJobService {
protected final Logger log = LoggerFactory.getLogger(getClass());

@Autowired
private ConceptSetSearchService conceptSetSearchService;

@Autowired
private ConceptSetService conceptSetService;

@Autowired
private VocabularyService vocabService;

@Autowired
private StepBuilderFactory stepBuilderFactory;

@Autowired
private JobBuilderFactory jobBuilderFactory;

@Autowired
private JobTemplate jobTemplate;

@Autowired
private JobExplorer jobExplorer;

private static final String REINDEX_JOB_NAME = "concept sets reindex: %s";

private static final String REINDEX_TOTAL_DOCUMENTS = "REINDEX_TOTAL_DOCUMENTS";

private static final String REINDEX_PROCESSED_DOCUMENTS = "REINDEX_PROCESSED_DOCUMENTS";

public ConceptSetReindexDTO createIndex(String sourceKey) {
if (!conceptSetSearchService.isSearchAvailable()) {
return new ConceptSetReindexDTO(ConceptSetReindexStatus.UNAVAILABLE);
}
String jobName = String.format(REINDEX_JOB_NAME, sourceKey);
Optional<JobExecution> jobExecutionOptional = jobExplorer.findRunningJobExecutions(jobName).stream().findFirst();
if (jobExecutionOptional.isPresent()) {
return new ConceptSetReindexDTO(ConceptSetReindexStatus.RUNNING, jobExecutionOptional.get().getId());
} else {
JobExecutionResource newJobExecutionResource = createReindexJob(sourceKey);
return new ConceptSetReindexDTO(ConceptSetReindexStatus.CREATED, newJobExecutionResource.getExecutionId());
}
}

public ConceptSetReindexDTO getIndexStatus(String sourceKey, Long executionId) {
if (!conceptSetSearchService.isSearchAvailable()) {
return new ConceptSetReindexDTO(ConceptSetReindexStatus.UNAVAILABLE);
}
String jobName = String.format(REINDEX_JOB_NAME, sourceKey);
Optional<JobExecution> jobExecutionOptional = jobExplorer.findRunningJobExecutions(jobName).stream().findFirst();
JobExecution jobExecution = jobExecutionOptional.orElseGet(() -> jobExplorer.getJobExecution(executionId));

if (jobExecution != null) {
ConceptSetReindexDTO conceptSetReindexDTO;
if ("COMPLETED".equals(jobExecution.getStatus().name())) {
conceptSetReindexDTO = new ConceptSetReindexDTO(ConceptSetReindexStatus.COMPLETED);
} else if ("FAILED".equals(jobExecution.getStatus().name())) {
conceptSetReindexDTO = new ConceptSetReindexDTO(ConceptSetReindexStatus.FAILED);
} else {
conceptSetReindexDTO = new ConceptSetReindexDTO(ConceptSetReindexStatus.RUNNING);
}
conceptSetReindexDTO.setExecutionId(jobExecution.getId());
fillCounts(conceptSetReindexDTO, jobExecution);
return conceptSetReindexDTO;
} else {
return new ConceptSetReindexDTO(ConceptSetReindexStatus.UNAVAILABLE);
}
}

private JobExecutionResource createReindexJob(String sourceKey) {
String jobName = String.format(REINDEX_JOB_NAME, sourceKey);
final List<ConceptSet> conceptSets = conceptSetService.getConceptSetRepository().findAll();
conceptSetSearchService.clearConceptSetIndex();
Step step = stepBuilderFactory.get(jobName)
.<ConceptDocuments, ConceptDocuments>chunk(1)
.reader(new DocumentReader(sourceKey, conceptSets))
.writer(new DocumentWriter())
.listener(new JobStepExecutionListener())
.listener(new JobChunkListener())
.build();

JobParametersBuilder parametersBuilder = new JobParametersBuilder();
parametersBuilder.addString(JOB_NAME, String.format(REINDEX_JOB_NAME, sourceKey));
parametersBuilder.addString(Constants.Params.SOURCE_KEY, sourceKey);
parametersBuilder.addString(REINDEX_TOTAL_DOCUMENTS, String.valueOf(conceptSets.size()));

Job reindexJob = jobBuilderFactory.get(jobName)
.start(step)
.build();

return jobTemplate.launch(reindexJob, parametersBuilder.toJobParameters());
}

private void fillCounts(ConceptSetReindexDTO conceptSetReindexDTO, JobExecution jobExecution) {
try {
String jobTotalDocuments = jobExecution.getJobParameters().getString(REINDEX_TOTAL_DOCUMENTS);
if (jobTotalDocuments != null) {
conceptSetReindexDTO.setMaxCount(Integer.parseInt(jobTotalDocuments));
}
Object jobProcessedDocuments = jobExecution.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS);
if (jobProcessedDocuments != null) {
conceptSetReindexDTO.setDoneCount((Integer) jobProcessedDocuments);
} else {
// If the job is still running we can get number of processed documents only from step parameters
jobExecution.getStepExecutions().stream()
.filter(se -> se.getStepName().equals(jobExecution.getJobParameters().getString(JOB_NAME)))
.findFirst()
.ifPresent(se -> {
Object stepProcessedDocuments = se.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS);
if (stepProcessedDocuments != null) {
conceptSetReindexDTO.setDoneCount((Integer) stepProcessedDocuments);
}
});
}
} catch (Exception e) {
log.error("Failed to get count parameters for job with execution id = {}, {}", jobExecution.getId(), e);
}
}

private static class ConceptDocuments {
final Integer conceptSetId;

final List<ConceptSetSearchDocument> documents;

private ConceptDocuments() {
this.conceptSetId = null;
this.documents = Collections.emptyList();
}

private ConceptDocuments(Integer conceptSetId, List<ConceptSetSearchDocument> documents) {
this.conceptSetId = conceptSetId;
this.documents = documents;
}

public boolean hasDataToProcess() {
return conceptSetId != null && documents != null && !documents.isEmpty();
}
}

private class DocumentReader implements ItemReader<ConceptDocuments> {
private final Iterator<ConceptSet> iterator;

private final String sourceKey;

public DocumentReader(String sourceKey, List<ConceptSet> conceptSets) {
this.iterator = conceptSets.stream().iterator();
this.sourceKey = sourceKey;
}

@Override
public ConceptDocuments read() throws Exception {
if (iterator.hasNext()) {
ConceptSet conceptSet = iterator.next();
final ConceptSetExpression csExpression = conceptSetService.getConceptSetExpressionOrNull(conceptSet.getId());
if (csExpression == null) {
// data source does not contain required concepts, skip CS
return new ConceptDocuments();
}

final Collection<Concept> concepts = vocabService.executeMappedLookup(sourceKey, csExpression);

final List<ConceptSetSearchDocument> documents = concepts.stream().map(item -> {
final ConceptSetSearchDocument concept = new ConceptSetSearchDocument();
concept.setConceptSetId(conceptSet.getId());
concept.setConceptId(item.conceptId);
concept.setConceptName(item.conceptName);
concept.setConceptCode(item.conceptCode);
concept.setDomainName(item.domainId);
return concept;
}).collect(Collectors.toList());
return new ConceptDocuments(conceptSet.getId(), documents);
} else {
return null;
}
}
}

private class DocumentWriter implements ItemWriter<ConceptDocuments> {
@Override
public void write(List<? extends ConceptDocuments> list) throws Exception {
list.stream()
.filter(ConceptDocuments::hasDataToProcess)
.forEach(cd -> conceptSetSearchService.reindexConceptSet(cd.conceptSetId, cd.documents));
}
}

public class JobStepExecutionListener implements StepExecutionListener {
@Override
public void beforeStep(StepExecution stepExecution) {
}

@Override
public ExitStatus afterStep(StepExecution stepExecution) {
Object processedCount = stepExecution.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS);
if (processedCount != null) {
if ((Integer) processedCount != 0) {
// Subtract 1 if the value is not equal to zero because "beforeChunk" method is called
// even if there's no element to process, so we get total number of processed documents plus one
stepExecution.getJobExecution().getExecutionContext()
.put(REINDEX_PROCESSED_DOCUMENTS, ((Integer) processedCount) - 1);
}
}
return stepExecution.getExitStatus();
}
}

public class JobChunkListener implements ChunkListener {
private int counter = 0;

@Override
public void beforeChunk(ChunkContext context) {
// Increment the number of processed documents before chunk because saving of step execution parameters
// is made before "afterChunk" is called
context.getStepContext().getStepExecution().getExecutionContext()
.put(REINDEX_PROCESSED_DOCUMENTS, ++counter);
}

@Override
public void afterChunk(ChunkContext context) {
// This method is called after saving of step parameters, so we can't use it
}

@Override
public void afterChunkError(ChunkContext context) {
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package org.ohdsi.webapi.conceptset.search;

public enum ConceptSetReindexStatus {
UNAVAILABLE, CREATED, RUNNING, COMPLETED, FAILED
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package org.ohdsi.webapi.conceptset.search;

public class ConceptSetSearchDocument {
private int conceptSetId;
private long conceptId;
private String conceptName;
private String conceptCode;
private String domainName;

public int getConceptSetId() {
return conceptSetId;
}

public void setConceptSetId(final int conceptSetId) {
this.conceptSetId = conceptSetId;
}

public long getConceptId() {
return conceptId;
}

public void setConceptId(final long conceptId) {
this.conceptId = conceptId;
}

public String getConceptName() {
return conceptName;
}

public void setConceptName(final String conceptName) {
this.conceptName = conceptName;
}

public String getConceptCode() {
return conceptCode;
}

public void setConceptCode(final String conceptCode) {
this.conceptCode = conceptCode;
}

public String getDomainName() {
return domainName;
}

public void setDomainName(final String domainName) {
this.domainName = domainName;
}
}
Loading