From d0830828323e5a266dfe32052d2a262f2ea252e5 Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Mon, 31 Oct 2022 17:34:50 +0300 Subject: [PATCH 01/27] Concept set search WIP: add API method --- .../webapi/service/ConceptSetService.java | 20 +++++++++++++++++ .../service/dto/ConceptSetSearchDTO.java | 22 +++++++++++++++++++ ...000__add_concept_set_search_permission.sql | 7 ++++++ 3 files changed, 49 insertions(+) create mode 100644 src/main/java/org/ohdsi/webapi/service/dto/ConceptSetSearchDTO.java create mode 100644 src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index 1186a22445..a9cdf00f49 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -37,6 +37,7 @@ import org.ohdsi.webapi.exception.ConceptNotExistException; import org.ohdsi.webapi.security.PermissionService; import org.ohdsi.webapi.service.dto.ConceptSetDTO; +import org.ohdsi.webapi.service.dto.ConceptSetSearchDTO; import org.ohdsi.webapi.shiro.Entities.UserEntity; import org.ohdsi.webapi.shiro.Entities.UserRepository; import org.ohdsi.webapi.shiro.management.Security; @@ -49,6 +50,7 @@ import org.ohdsi.webapi.util.ExportUtil; import org.ohdsi.webapi.util.NameUtils; import org.ohdsi.webapi.util.ExceptionUtils; +import org.ohdsi.webapi.util.PreparedStatementRenderer; import org.ohdsi.webapi.versioning.domain.ConceptSetVersion; import org.ohdsi.webapi.versioning.domain.Version; import org.ohdsi.webapi.versioning.domain.VersionBase; @@ -826,6 +828,24 @@ public List listByTags(TagNameListRequestDTO requestDTO) { return listByTags(entities, names, ConceptSetDTO.class); } + + /** + * Search for concept sets. + * + * @summary Search for a concept bt search string and domains (search among containing concepts) + * @param sourceKey The source key + * @param search The ConceptSetSearchDTO + * @return A collection of concept sets + */ + @Path("{sourceKey}/search") + @POST + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + public Collection executeSearch(@PathParam("sourceKey") String sourceKey, ConceptSetSearchDTO search) { + /* todo implement search */ + return getConceptSets(); + } + private void checkVersion(int id, int version) { checkVersion(id, version, true); } diff --git a/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetSearchDTO.java b/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetSearchDTO.java new file mode 100644 index 0000000000..8e9a2607f6 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetSearchDTO.java @@ -0,0 +1,22 @@ +package org.ohdsi.webapi.service.dto; + +public class ConceptSetSearchDTO { + private String query; + private String[] domainId; + + public String getQuery() { + return query; + } + + public void setQuery(final String query) { + this.query = query; + } + + public String[] getDomainId() { + return domainId; + } + + public void setDomainId(final String[] domainId) { + this.domainId = domainId; + } +} diff --git a/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql b/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql new file mode 100644 index 0000000000..a0b704cf0c --- /dev/null +++ b/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql @@ -0,0 +1,7 @@ +INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES + (nextval('${ohdsiSchema}.sec_permission_id_seq'), 'conceptset:*:search', 'Concept sets search permission'); + +INSERT INTO ${ohdsiSchema}.sec_role_permission(id, role_id, permission_id) +SELECT nextval('${ohdsiSchema}.sec_role_permission_sequence'), sr.id, sp.id +FROM ${ohdsiSchema}.sec_permission SP, ${ohdsiSchema}.sec_role sr +WHERE sp.value = 'conceptset:*:search' AND sr.name IN ('Atlas users'); \ No newline at end of file From 0b5e68f6a57f7622894df55383e21749534e53a8 Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Tue, 22 Nov 2022 01:51:52 +0300 Subject: [PATCH 02/27] Concept sets search - initial implementation --- pom.xml | 1 + .../conceptset/ConceptSetRepository.java | 4 + .../search/ConceptSetSearchDocument.java | 49 ++++++++ .../search/ConceptSetSearchService.java | 119 ++++++++++++++++++ .../webapi/service/ConceptSetService.java | 105 +++++++++++++--- src/main/resources/application.properties | 1 + ...000__add_concept_set_search_permission.sql | 9 +- .../conf/dataimport.properties | 0 .../conf/lang/contractions_ca.txt | 0 .../conf/lang/contractions_fr.txt | 0 .../conf/lang/contractions_ga.txt | 0 .../conf/lang/contractions_it.txt | 0 .../conf/lang/hyphenations_ga.txt | 0 .../conf/lang/stemdict_nl.txt | 0 .../conf/lang/stoptags_ja.txt | 0 .../conf/lang/stopwords_ar.txt | 0 .../conf/lang/stopwords_bg.txt | 0 .../conf/lang/stopwords_ca.txt | 0 .../conf/lang/stopwords_cz.txt | 0 .../conf/lang/stopwords_da.txt | 0 .../conf/lang/stopwords_de.txt | 0 .../conf/lang/stopwords_el.txt | 0 .../conf/lang/stopwords_en.txt | 0 .../conf/lang/stopwords_es.txt | 0 .../conf/lang/stopwords_eu.txt | 0 .../conf/lang/stopwords_fa.txt | 0 .../conf/lang/stopwords_fi.txt | 0 .../conf/lang/stopwords_fr.txt | 0 .../conf/lang/stopwords_ga.txt | 0 .../conf/lang/stopwords_gl.txt | 0 .../conf/lang/stopwords_hi.txt | 0 .../conf/lang/stopwords_hu.txt | 0 .../conf/lang/stopwords_hy.txt | 0 .../conf/lang/stopwords_id.txt | 0 .../conf/lang/stopwords_it.txt | 0 .../conf/lang/stopwords_ja.txt | 0 .../conf/lang/stopwords_lv.txt | 0 .../conf/lang/stopwords_nl.txt | 0 .../conf/lang/stopwords_no.txt | 0 .../conf/lang/stopwords_pt.txt | 0 .../conf/lang/stopwords_ro.txt | 0 .../conf/lang/stopwords_ru.txt | 0 .../conf/lang/stopwords_sv.txt | 0 .../conf/lang/stopwords_th.txt | 0 .../conf/lang/stopwords_tr.txt | 0 .../conf/lang/userdict_ja.txt | 0 .../solr/{ => vocabulary}/conf/managed-schema | 0 .../solr/{ => vocabulary}/conf/params.json | 0 .../solr/{ => vocabulary}/conf/protwords.txt | 0 .../solr/{ => vocabulary}/conf/solrconfig.xml | 0 .../solr/{ => vocabulary}/conf/stopwords.txt | 0 .../solr/{ => vocabulary}/conf/synonyms.txt | 0 .../solr/{ => vocabulary}/core.properties | 0 .../solr/{ => vocabulary}/data-config.xml | 0 54 files changed, 270 insertions(+), 18 deletions(-) create mode 100644 src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchDocument.java create mode 100644 src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java rename src/main/resources/solr/{ => vocabulary}/conf/dataimport.properties (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/contractions_ca.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/contractions_fr.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/contractions_ga.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/contractions_it.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/hyphenations_ga.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stemdict_nl.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stoptags_ja.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_ar.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_bg.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_ca.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_cz.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_da.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_de.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_el.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_en.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_es.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_eu.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_fa.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_fi.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_fr.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_ga.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_gl.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_hi.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_hu.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_hy.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_id.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_it.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_ja.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_lv.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_nl.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_no.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_pt.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_ro.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_ru.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_sv.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_th.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/stopwords_tr.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/lang/userdict_ja.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/managed-schema (100%) rename src/main/resources/solr/{ => vocabulary}/conf/params.json (100%) rename src/main/resources/solr/{ => vocabulary}/conf/protwords.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/solrconfig.xml (100%) rename src/main/resources/solr/{ => vocabulary}/conf/stopwords.txt (100%) rename src/main/resources/solr/{ => vocabulary}/conf/synonyms.txt (100%) rename src/main/resources/solr/{ => vocabulary}/core.properties (100%) rename src/main/resources/solr/{ => vocabulary}/data-config.xml (100%) diff --git a/pom.xml b/pom.xml index d65ab3f7e1..78759b9e3b 100644 --- a/pom.xml +++ b/pom.xml @@ -70,6 +70,7 @@ {!complexphrase inOrder=true} 8.11.2 + webapi-conceptsets 5 diff --git a/src/main/java/org/ohdsi/webapi/conceptset/ConceptSetRepository.java b/src/main/java/org/ohdsi/webapi/conceptset/ConceptSetRepository.java index bdd42445d1..6a66d3c919 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/ConceptSetRepository.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/ConceptSetRepository.java @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.List; import java.util.Optional; +import java.util.Set; /** * @@ -29,6 +30,9 @@ */ public interface ConceptSetRepository extends CrudRepository { ConceptSet findById(Integer conceptSetId); + + @Query("select cs from ConceptSet cs where cs.id in (:conceptSetIds)") + List findAllById(@Param("conceptSetIds")final Set conceptSetIds); @Deprecated @Query("SELECT cs FROM ConceptSet cs WHERE cs.name = :conceptSetName and cs.id <> :conceptSetId") diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchDocument.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchDocument.java new file mode 100644 index 0000000000..8be5d59d46 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchDocument.java @@ -0,0 +1,49 @@ +package org.ohdsi.webapi.conceptset.search; + +public class ConceptSetSearchDocument { + private int conceptSetId; + private long conceptId; + private String conceptName; + private String conceptCode; + private String domainName; + + public int getConceptSetId() { + return conceptSetId; + } + + public void setConceptSetId(final int conceptSetId) { + this.conceptSetId = conceptSetId; + } + + public long getConceptId() { + return conceptId; + } + + public void setConceptId(final long conceptId) { + this.conceptId = conceptId; + } + + public String getConceptName() { + return conceptName; + } + + public void setConceptName(final String conceptName) { + this.conceptName = conceptName; + } + + public String getConceptCode() { + return conceptCode; + } + + public void setConceptCode(final String conceptCode) { + this.conceptCode = conceptCode; + } + + public String getDomainName() { + return domainName; + } + + public void setDomainName(final String domainName) { + this.domainName = domainName; + } +} diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java new file mode 100644 index 0000000000..97ab772956 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java @@ -0,0 +1,119 @@ +package org.ohdsi.webapi.conceptset.search; + +import org.apache.commons.beanutils.ConvertUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.common.SolrInputDocument; +import org.ohdsi.webapi.conceptset.ConceptSet; +import org.ohdsi.webapi.service.dto.ConceptSetSearchDTO; +import org.ohdsi.webapi.vocabulary.SolrSearchClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +@Service +public class ConceptSetSearchService { + protected final Logger log = LoggerFactory.getLogger(getClass()); + + @Value("${solr.conceptsets.core}") + private String conceptSetsCore; + + @Autowired + private SolrSearchClient solrSearchClient; + + public Set searchConceptSets(final ConceptSetSearchDTO dto) { + final SolrClient solrClient = solrSearchClient.getSolrClient(conceptSetsCore); + + final Set results = new HashSet<>(); + + try { + final SolrQuery q = new SolrQuery(); + q.setQuery(composeSearchQuery(dto)); + q.add("group", "true"); + q.add("group.field", "concept_set_id"); + + solrClient.query(q).getGroupResponse().getValues().forEach(gr -> { + gr.getValues().forEach(g -> { + results.add(NumberUtils.createInteger(ConvertUtils.convert(g.getGroupValue()))); + }); + }); + + } catch (final Exception e) { + log.error("SOLR Search Query: {} failed with message: {}", dto.getQuery(), e.getMessage()); + } + + return results; + } + + public void indexConceptSetsFull(final Collection documents) { + try { + final SolrClient solrClient = solrSearchClient.getSolrClient(conceptSetsCore); + + // delete all before index + solrClient.deleteByQuery("*:*"); + + documents.forEach(d -> addDocumentToIndex(solrClient, d)); + solrClient.commit(); + + } catch (final Exception e) { + log.error("SOLR error: concept sets index failed with message: {}", e.getMessage()); + } + } + + public void reindexConceptSet(final Integer conceptSetId, final Collection documents) { + try { + final SolrClient solrClient = solrSearchClient.getSolrClient(conceptSetsCore); + + solrClient.deleteByQuery("concept_set_id:" + conceptSetId); + documents.forEach(d -> addDocumentToIndex(solrClient, d)); + solrClient.commit(); + + } catch (final Exception e) { + log.error("SOLR error: concept set {} index failed with message: {}", conceptSetId, e.getMessage()); + } + } + + public void deleteConceptSetIndex(final Integer conceptSetId) { + try { + final SolrClient solrClient = solrSearchClient.getSolrClient(conceptSetsCore); + + solrClient.deleteByQuery("concept_set_id:" + conceptSetId); + solrClient.commit(); + + } catch (final Exception e) { + log.error("SOLR error: concept set {} index failed with message: {}", conceptSetId, e.getMessage()); + } + } + + private String composeSearchQuery(final ConceptSetSearchDTO dto) { + String searchQuery = solrSearchClient.formatSearchQuery(dto.getQuery().trim()); + + if (dto.getDomainId() != null && dto.getDomainId().length > 0) { + searchQuery += " AND domain_name:(" + String.join(" OR ", dto.getDomainId()) + ")"; + } + + return searchQuery; + } + + private void addDocumentToIndex(final SolrClient solrClient, final ConceptSetSearchDocument searchDocument) { + try { + final SolrInputDocument document = new SolrInputDocument(); + document.addField("concept_set_id", searchDocument.getConceptSetId()); + document.addField("concept_id", searchDocument.getConceptId()); + document.addField("concept_name", searchDocument.getConceptName()); + document.addField("concept_code", searchDocument.getConceptCode()); + document.addField("domain_name", searchDocument.getDomainName()); + solrClient.add(document); + } catch (final Exception e) { + log.error("SOLR error: cannot add document to index: {}", e.getMessage()); + } + } +} \ No newline at end of file diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index a9cdf00f49..d0389502f7 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -17,6 +17,7 @@ import java.io.ByteArrayOutputStream; import java.util.*; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import javax.transaction.Transactional; @@ -34,6 +35,8 @@ import org.ohdsi.webapi.conceptset.ConceptSetGenerationInfoRepository; import org.ohdsi.webapi.conceptset.ConceptSetItem; import org.ohdsi.webapi.conceptset.dto.ConceptSetVersionFullDTO; +import org.ohdsi.webapi.conceptset.search.ConceptSetSearchDocument; +import org.ohdsi.webapi.conceptset.search.ConceptSetSearchService; import org.ohdsi.webapi.exception.ConceptNotExistException; import org.ohdsi.webapi.security.PermissionService; import org.ohdsi.webapi.service.dto.ConceptSetDTO; @@ -50,7 +53,6 @@ import org.ohdsi.webapi.util.ExportUtil; import org.ohdsi.webapi.util.NameUtils; import org.ohdsi.webapi.util.ExceptionUtils; -import org.ohdsi.webapi.util.PreparedStatementRenderer; import org.ohdsi.webapi.versioning.domain.ConceptSetVersion; import org.ohdsi.webapi.versioning.domain.Version; import org.ohdsi.webapi.versioning.domain.VersionBase; @@ -62,6 +64,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.convert.support.GenericConversionService; import org.springframework.dao.EmptyResultDataAccessException; +import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Component; /** @@ -81,6 +84,9 @@ public class ConceptSetService extends AbstractDaoService implements HasTags concepts = csExport.mappedConcepts.stream() + .map(item -> { + final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); + concept.setConceptSetId(id); + concept.setConceptId(item.conceptId); + concept.setConceptName(item.conceptName); + concept.setConceptCode(item.conceptCode); + concept.setDomainName(item.domainId); + return concept; + }).collect(Collectors.toList()); + + conceptSetSearchService.reindexConceptSet(id, concepts); + return true; } @@ -607,6 +629,9 @@ public void deleteConceptSet(@PathParam("id") final int id) { catch (Exception e) { throw e; } + + // Delete CS index + conceptSetSearchService.deleteConceptSetIndex(id); } /** @@ -828,23 +853,69 @@ public List listByTags(TagNameListRequestDTO requestDTO) { return listByTags(entities, names, ConceptSetDTO.class); } + /** + * Search for concept sets. + * + * @summary Search for a concept bt search string and domains (search among containing concepts) + * @param sourceKey The source key + * @param search The ConceptSetSearchDTO + * @return A collection of concept sets + */ + @Path("{sourceKey}/search") + @POST + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + @Transactional + public Collection executeSearch(@PathParam("sourceKey") String sourceKey, ConceptSetSearchDTO search) { + final Set ids = conceptSetSearchService.searchConceptSets(search); + + if (ids.isEmpty()) { + return Collections.emptyList(); + } - /** - * Search for concept sets. - * - * @summary Search for a concept bt search string and domains (search among containing concepts) - * @param sourceKey The source key - * @param search The ConceptSetSearchDTO - * @return A collection of concept sets - */ - @Path("{sourceKey}/search") - @POST - @Produces(MediaType.APPLICATION_JSON) - @Consumes(MediaType.APPLICATION_JSON) - public Collection executeSearch(@PathParam("sourceKey") String sourceKey, ConceptSetSearchDTO search) { - /* todo implement search */ - return getConceptSets(); - } + return getConceptSetRepository().findAllById(ids).stream() + .map(conceptSet -> { + ConceptSetDTO dto = conversionService.convert(conceptSet, ConceptSetDTO.class); + permissionService.fillWriteAccess(conceptSet, dto); + return dto; + }) + .collect(Collectors.toList()); + } + + /** + * Index concept sets for search. + * + * @summary Index all concept sets for search + * @param sourceKey The source key + */ + @Path("{sourceKey}/index") + @GET + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + public void fullIndex(@PathParam("sourceKey") String sourceKey) { + final Collection documents = new ArrayList<>(); + + getConceptSetRepository().findAll().forEach(cs -> { + final ConceptSetExpression csExpression = this.getConceptSetExpression(cs.getId()); + final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); + + documents.addAll(concepts.stream().map(item -> { + final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); + concept.setConceptSetId(cs.getId()); + concept.setConceptId(item.conceptId); + concept.setConceptName(item.conceptName); + concept.setConceptCode(item.conceptCode); + concept.setDomainName(item.domainId); + return concept; + }).collect(Collectors.toList())); + + log.info("Concept set {} added to reindex", cs.getId()); + }); + + log.info("Full reindex start"); + conceptSetSearchService.indexConceptSetsFull(documents); + log.info("Full reindex finish"); + } private void checkVersion(int id, int version) { checkVersion(id, version, true); diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 02791a3dcd..be94cb4497 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -96,6 +96,7 @@ security.cas.casticket=${security.cas.casticket} # Full Text Search settings solr.endpoint = ${solr.endpoint} solr.query.prefix = ${solr.query.prefix} +solr.conceptsets.core = ${solr.conceptsets.core} # Enabling Compression compression=on compressableMimeType=application/json,application/xml,text/html,text/xml,text/plain diff --git a/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql b/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql index a0b704cf0c..e2848f20f2 100644 --- a/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql +++ b/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql @@ -1,7 +1,14 @@ INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES (nextval('${ohdsiSchema}.sec_permission_id_seq'), 'conceptset:*:search', 'Concept sets search permission'); +INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES + (nextval('${ohdsiSchema}.sec_permission_id_seq'), 'conceptset:*:index', 'Concept sets search index permission'); + +INSERT INTO ${ohdsiSchema}.sec_role_permission(id, role_id, permission_id) +SELECT nextval('${ohdsiSchema}.sec_role_permission_sequence'), sr.id, sp.id +FROM ${ohdsiSchema}.sec_permission SP, ${ohdsiSchema}.sec_role sr +WHERE sp.value = 'conceptset:*:search' AND sr.name IN ('Atlas users'); INSERT INTO ${ohdsiSchema}.sec_role_permission(id, role_id, permission_id) SELECT nextval('${ohdsiSchema}.sec_role_permission_sequence'), sr.id, sp.id FROM ${ohdsiSchema}.sec_permission SP, ${ohdsiSchema}.sec_role sr -WHERE sp.value = 'conceptset:*:search' AND sr.name IN ('Atlas users'); \ No newline at end of file +WHERE sp.value = 'conceptset:*:index' AND sr.name IN ('admin'); \ No newline at end of file diff --git a/src/main/resources/solr/conf/dataimport.properties b/src/main/resources/solr/vocabulary/conf/dataimport.properties similarity index 100% rename from src/main/resources/solr/conf/dataimport.properties rename to src/main/resources/solr/vocabulary/conf/dataimport.properties diff --git a/src/main/resources/solr/conf/lang/contractions_ca.txt b/src/main/resources/solr/vocabulary/conf/lang/contractions_ca.txt similarity index 100% rename from src/main/resources/solr/conf/lang/contractions_ca.txt rename to src/main/resources/solr/vocabulary/conf/lang/contractions_ca.txt diff --git a/src/main/resources/solr/conf/lang/contractions_fr.txt b/src/main/resources/solr/vocabulary/conf/lang/contractions_fr.txt similarity index 100% rename from src/main/resources/solr/conf/lang/contractions_fr.txt rename to src/main/resources/solr/vocabulary/conf/lang/contractions_fr.txt diff --git a/src/main/resources/solr/conf/lang/contractions_ga.txt b/src/main/resources/solr/vocabulary/conf/lang/contractions_ga.txt similarity index 100% rename from src/main/resources/solr/conf/lang/contractions_ga.txt rename to src/main/resources/solr/vocabulary/conf/lang/contractions_ga.txt diff --git a/src/main/resources/solr/conf/lang/contractions_it.txt b/src/main/resources/solr/vocabulary/conf/lang/contractions_it.txt similarity index 100% rename from src/main/resources/solr/conf/lang/contractions_it.txt rename to src/main/resources/solr/vocabulary/conf/lang/contractions_it.txt diff --git a/src/main/resources/solr/conf/lang/hyphenations_ga.txt b/src/main/resources/solr/vocabulary/conf/lang/hyphenations_ga.txt similarity index 100% rename from src/main/resources/solr/conf/lang/hyphenations_ga.txt rename to src/main/resources/solr/vocabulary/conf/lang/hyphenations_ga.txt diff --git a/src/main/resources/solr/conf/lang/stemdict_nl.txt b/src/main/resources/solr/vocabulary/conf/lang/stemdict_nl.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stemdict_nl.txt rename to src/main/resources/solr/vocabulary/conf/lang/stemdict_nl.txt diff --git a/src/main/resources/solr/conf/lang/stoptags_ja.txt b/src/main/resources/solr/vocabulary/conf/lang/stoptags_ja.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stoptags_ja.txt rename to src/main/resources/solr/vocabulary/conf/lang/stoptags_ja.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_ar.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_ar.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_ar.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_ar.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_bg.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_bg.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_bg.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_bg.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_ca.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_ca.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_ca.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_ca.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_cz.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_cz.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_cz.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_cz.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_da.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_da.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_da.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_da.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_de.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_de.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_de.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_de.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_el.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_el.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_el.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_el.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_en.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_en.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_en.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_en.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_es.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_es.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_es.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_es.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_eu.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_eu.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_eu.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_eu.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_fa.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_fa.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_fa.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_fa.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_fi.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_fi.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_fi.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_fi.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_fr.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_fr.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_fr.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_fr.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_ga.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_ga.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_ga.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_ga.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_gl.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_gl.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_gl.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_gl.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_hi.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_hi.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_hi.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_hi.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_hu.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_hu.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_hu.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_hu.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_hy.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_hy.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_hy.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_hy.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_id.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_id.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_id.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_id.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_it.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_it.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_it.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_it.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_ja.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_ja.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_ja.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_ja.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_lv.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_lv.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_lv.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_lv.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_nl.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_nl.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_nl.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_nl.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_no.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_no.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_no.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_no.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_pt.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_pt.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_pt.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_pt.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_ro.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_ro.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_ro.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_ro.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_ru.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_ru.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_ru.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_ru.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_sv.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_sv.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_sv.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_sv.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_th.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_th.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_th.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_th.txt diff --git a/src/main/resources/solr/conf/lang/stopwords_tr.txt b/src/main/resources/solr/vocabulary/conf/lang/stopwords_tr.txt similarity index 100% rename from src/main/resources/solr/conf/lang/stopwords_tr.txt rename to src/main/resources/solr/vocabulary/conf/lang/stopwords_tr.txt diff --git a/src/main/resources/solr/conf/lang/userdict_ja.txt b/src/main/resources/solr/vocabulary/conf/lang/userdict_ja.txt similarity index 100% rename from src/main/resources/solr/conf/lang/userdict_ja.txt rename to src/main/resources/solr/vocabulary/conf/lang/userdict_ja.txt diff --git a/src/main/resources/solr/conf/managed-schema b/src/main/resources/solr/vocabulary/conf/managed-schema similarity index 100% rename from src/main/resources/solr/conf/managed-schema rename to src/main/resources/solr/vocabulary/conf/managed-schema diff --git a/src/main/resources/solr/conf/params.json b/src/main/resources/solr/vocabulary/conf/params.json similarity index 100% rename from src/main/resources/solr/conf/params.json rename to src/main/resources/solr/vocabulary/conf/params.json diff --git a/src/main/resources/solr/conf/protwords.txt b/src/main/resources/solr/vocabulary/conf/protwords.txt similarity index 100% rename from src/main/resources/solr/conf/protwords.txt rename to src/main/resources/solr/vocabulary/conf/protwords.txt diff --git a/src/main/resources/solr/conf/solrconfig.xml b/src/main/resources/solr/vocabulary/conf/solrconfig.xml similarity index 100% rename from src/main/resources/solr/conf/solrconfig.xml rename to src/main/resources/solr/vocabulary/conf/solrconfig.xml diff --git a/src/main/resources/solr/conf/stopwords.txt b/src/main/resources/solr/vocabulary/conf/stopwords.txt similarity index 100% rename from src/main/resources/solr/conf/stopwords.txt rename to src/main/resources/solr/vocabulary/conf/stopwords.txt diff --git a/src/main/resources/solr/conf/synonyms.txt b/src/main/resources/solr/vocabulary/conf/synonyms.txt similarity index 100% rename from src/main/resources/solr/conf/synonyms.txt rename to src/main/resources/solr/vocabulary/conf/synonyms.txt diff --git a/src/main/resources/solr/core.properties b/src/main/resources/solr/vocabulary/core.properties similarity index 100% rename from src/main/resources/solr/core.properties rename to src/main/resources/solr/vocabulary/core.properties diff --git a/src/main/resources/solr/data-config.xml b/src/main/resources/solr/vocabulary/data-config.xml similarity index 100% rename from src/main/resources/solr/data-config.xml rename to src/main/resources/solr/vocabulary/data-config.xml From 84ceab5bce8d7cc94cbd6ecb0de50ee6a0d35ffd Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Tue, 22 Nov 2022 01:53:16 +0300 Subject: [PATCH 03/27] Concept sets search - solr core config --- .../conf/lang/contractions_ca.txt | 8 + .../conf/lang/contractions_fr.txt | 15 + .../conf/lang/contractions_ga.txt | 5 + .../conf/lang/contractions_it.txt | 23 + .../conf/lang/hyphenations_ga.txt | 5 + .../concept-sets/conf/lang/stemdict_nl.txt | 6 + .../concept-sets/conf/lang/stoptags_ja.txt | 420 +++++ .../concept-sets/conf/lang/stopwords_ar.txt | 125 ++ .../concept-sets/conf/lang/stopwords_bg.txt | 193 +++ .../concept-sets/conf/lang/stopwords_ca.txt | 220 +++ .../concept-sets/conf/lang/stopwords_cz.txt | 172 +++ .../concept-sets/conf/lang/stopwords_da.txt | 110 ++ .../concept-sets/conf/lang/stopwords_de.txt | 294 ++++ .../concept-sets/conf/lang/stopwords_el.txt | 78 + .../concept-sets/conf/lang/stopwords_en.txt | 22 + .../concept-sets/conf/lang/stopwords_es.txt | 356 +++++ .../concept-sets/conf/lang/stopwords_eu.txt | 99 ++ .../concept-sets/conf/lang/stopwords_fa.txt | 313 ++++ .../concept-sets/conf/lang/stopwords_fi.txt | 97 ++ .../concept-sets/conf/lang/stopwords_fr.txt | 186 +++ .../concept-sets/conf/lang/stopwords_ga.txt | 110 ++ .../concept-sets/conf/lang/stopwords_gl.txt | 161 ++ .../concept-sets/conf/lang/stopwords_hi.txt | 235 +++ .../concept-sets/conf/lang/stopwords_hu.txt | 211 +++ .../concept-sets/conf/lang/stopwords_hy.txt | 46 + .../concept-sets/conf/lang/stopwords_id.txt | 359 +++++ .../concept-sets/conf/lang/stopwords_it.txt | 303 ++++ .../concept-sets/conf/lang/stopwords_ja.txt | 127 ++ .../concept-sets/conf/lang/stopwords_lv.txt | 172 +++ .../concept-sets/conf/lang/stopwords_nl.txt | 119 ++ .../concept-sets/conf/lang/stopwords_no.txt | 194 +++ .../concept-sets/conf/lang/stopwords_pt.txt | 253 +++ .../concept-sets/conf/lang/stopwords_ro.txt | 233 +++ .../concept-sets/conf/lang/stopwords_ru.txt | 243 +++ .../concept-sets/conf/lang/stopwords_sv.txt | 133 ++ .../concept-sets/conf/lang/stopwords_th.txt | 119 ++ .../concept-sets/conf/lang/stopwords_tr.txt | 212 +++ .../concept-sets/conf/lang/userdict_ja.txt | 29 + .../solr/concept-sets/conf/managed-schema | 985 ++++++++++++ .../solr/concept-sets/conf/params.json | 20 + .../solr/concept-sets/conf/protwords.txt | 21 + .../solr/concept-sets/conf/solrconfig.xml | 1357 +++++++++++++++++ .../solr/concept-sets/conf/stopwords.txt | 14 + .../solr/concept-sets/conf/synonyms.txt | 29 + 44 files changed, 8432 insertions(+) create mode 100644 src/main/resources/solr/concept-sets/conf/lang/contractions_ca.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/contractions_fr.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/contractions_ga.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/contractions_it.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/hyphenations_ga.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stemdict_nl.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stoptags_ja.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_ar.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_bg.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_ca.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_cz.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_da.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_de.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_el.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_en.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_es.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_eu.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_fa.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_fi.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_fr.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_ga.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_gl.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_hi.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_hu.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_hy.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_id.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_it.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_ja.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_lv.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_nl.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_no.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_pt.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_ro.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_ru.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_sv.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_th.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/stopwords_tr.txt create mode 100644 src/main/resources/solr/concept-sets/conf/lang/userdict_ja.txt create mode 100644 src/main/resources/solr/concept-sets/conf/managed-schema create mode 100644 src/main/resources/solr/concept-sets/conf/params.json create mode 100644 src/main/resources/solr/concept-sets/conf/protwords.txt create mode 100644 src/main/resources/solr/concept-sets/conf/solrconfig.xml create mode 100644 src/main/resources/solr/concept-sets/conf/stopwords.txt create mode 100644 src/main/resources/solr/concept-sets/conf/synonyms.txt diff --git a/src/main/resources/solr/concept-sets/conf/lang/contractions_ca.txt b/src/main/resources/solr/concept-sets/conf/lang/contractions_ca.txt new file mode 100644 index 0000000000..307a85f913 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/src/main/resources/solr/concept-sets/conf/lang/contractions_fr.txt b/src/main/resources/solr/concept-sets/conf/lang/contractions_fr.txt new file mode 100644 index 0000000000..f1bba51b23 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/contractions_fr.txt @@ -0,0 +1,15 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j +d +c +jusqu +quoiqu +lorsqu +puisqu diff --git a/src/main/resources/solr/concept-sets/conf/lang/contractions_ga.txt b/src/main/resources/solr/concept-sets/conf/lang/contractions_ga.txt new file mode 100644 index 0000000000..9ebe7fa349 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/src/main/resources/solr/concept-sets/conf/lang/contractions_it.txt b/src/main/resources/solr/concept-sets/conf/lang/contractions_it.txt new file mode 100644 index 0000000000..cac0409537 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/src/main/resources/solr/concept-sets/conf/lang/hyphenations_ga.txt b/src/main/resources/solr/concept-sets/conf/lang/hyphenations_ga.txt new file mode 100644 index 0000000000..4d2642cc5a --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/src/main/resources/solr/concept-sets/conf/lang/stemdict_nl.txt b/src/main/resources/solr/concept-sets/conf/lang/stemdict_nl.txt new file mode 100644 index 0000000000..441072971d --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/src/main/resources/solr/concept-sets/conf/lang/stoptags_ja.txt b/src/main/resources/solr/concept-sets/conf/lang/stoptags_ja.txt new file mode 100644 index 0000000000..71b750845e --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_ar.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ar.txt new file mode 100644 index 0000000000..046829db6a --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_bg.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_bg.txt new file mode 100644 index 0000000000..1ae4ba2ae3 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_ca.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ca.txt new file mode 100644 index 0000000000..3da65deafe --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_cz.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_cz.txt new file mode 100644 index 0000000000..53c6097dac --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_da.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_da.txt new file mode 100644 index 0000000000..42e6145b98 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_da.txt @@ -0,0 +1,110 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_de.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_de.txt new file mode 100644 index 0000000000..86525e7ae0 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_de.txt @@ -0,0 +1,294 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_el.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_el.txt new file mode 100644 index 0000000000..232681f5bd --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_en.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_en.txt new file mode 100644 index 0000000000..387d88810c --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_en.txt @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer + diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_es.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_es.txt new file mode 100644 index 0000000000..487d78c8d5 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_es.txt @@ -0,0 +1,356 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_eu.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_eu.txt new file mode 100644 index 0000000000..25f1db9346 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_fa.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_fa.txt new file mode 100644 index 0000000000..723641c6da --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_fi.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_fi.txt new file mode 100644 index 0000000000..4372c9a055 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_fi.txt @@ -0,0 +1,97 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_fr.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_fr.txt new file mode 100644 index 0000000000..749abae684 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_fr.txt @@ -0,0 +1,186 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +cela | that +celà | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_ga.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ga.txt new file mode 100644 index 0000000000..9ff88d747e --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_gl.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_gl.txt new file mode 100644 index 0000000000..d8760b12c1 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_hi.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_hi.txt new file mode 100644 index 0000000000..86286bb083 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_hu.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_hu.txt new file mode 100644 index 0000000000..37526da8aa --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_hu.txt @@ -0,0 +1,211 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_hy.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_hy.txt new file mode 100644 index 0000000000..60c1c50fbc --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_id.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_id.txt new file mode 100644 index 0000000000..4617f83a5c --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_it.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_it.txt new file mode 100644 index 0000000000..1219cc773a --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_it.txt @@ -0,0 +1,303 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_ja.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ja.txt new file mode 100644 index 0000000000..d4321be6b1 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_lv.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_lv.txt new file mode 100644 index 0000000000..e21a23c06c --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_nl.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_nl.txt new file mode 100644 index 0000000000..47a2aeacf6 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_nl.txt @@ -0,0 +1,119 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_no.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_no.txt new file mode 100644 index 0000000000..a7a2c28ba5 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_no.txt @@ -0,0 +1,194 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_pt.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_pt.txt new file mode 100644 index 0000000000..acfeb01af6 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_pt.txt @@ -0,0 +1,253 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_ro.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ro.txt new file mode 100644 index 0000000000..4fdee90a5b --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_ru.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ru.txt new file mode 100644 index 0000000000..55271400c6 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_ru.txt @@ -0,0 +1,243 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_sv.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_sv.txt new file mode 100644 index 0000000000..096f87f676 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_sv.txt @@ -0,0 +1,133 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_th.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_th.txt new file mode 100644 index 0000000000..07f0fabe69 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/src/main/resources/solr/concept-sets/conf/lang/stopwords_tr.txt b/src/main/resources/solr/concept-sets/conf/lang/stopwords_tr.txt new file mode 100644 index 0000000000..84d9408d4e --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/src/main/resources/solr/concept-sets/conf/lang/userdict_ja.txt b/src/main/resources/solr/concept-sets/conf/lang/userdict_ja.txt new file mode 100644 index 0000000000..6f0368e4d8 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/src/main/resources/solr/concept-sets/conf/managed-schema b/src/main/resources/solr/concept-sets/conf/managed-schema new file mode 100644 index 0000000000..6345c78c40 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/managed-schema @@ -0,0 +1,985 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/resources/solr/concept-sets/conf/params.json b/src/main/resources/solr/concept-sets/conf/params.json new file mode 100644 index 0000000000..06114ef257 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/params.json @@ -0,0 +1,20 @@ +{"params":{ + "query":{ + "defType":"edismax", + "q.alt":"*:*", + "rows":"10", + "fl":"*,score", + "":{"v":0} + }, + "facets":{ + "facet":"on", + "facet.mincount": "1", + "":{"v":0} + }, + "velocity":{ + "wt": "velocity", + "v.template":"browse", + "v.layout": "layout", + "":{"v":0} + } +}} \ No newline at end of file diff --git a/src/main/resources/solr/concept-sets/conf/protwords.txt b/src/main/resources/solr/concept-sets/conf/protwords.txt new file mode 100644 index 0000000000..1dfc0abecb --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/src/main/resources/solr/concept-sets/conf/solrconfig.xml b/src/main/resources/solr/concept-sets/conf/solrconfig.xml new file mode 100644 index 0000000000..caaefd7ba3 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/solrconfig.xml @@ -0,0 +1,1357 @@ + + + + + + + + + 7.4.0 + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + ${solr.ulog.numVersionBuckets:65536} + + + + + ${solr.autoCommit.maxTime:15000} + false + + + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + + + + + + + + + + + + + + + + explicit + json + true + + + + + + + + explicit + + + + + + _text_ + + + + + + + true + ignored_ + _text_ + + + + + + + + + text_general + + + + + + default + _text_ + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + + + + + + + default + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + + + + true + + + tvComponent + + + + + + + + + + + + true + false + + + terms + + + + + + + + string + + + + + + explicit + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + [^\w-\.] + _ + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd + + + + + java.lang.String + text_general + + *_str + 256 + + + true + + + java.lang.Boolean + booleans + + + java.util.Date + pdates + + + java.lang.Long + java.lang.Integer + plongs + + + java.lang.Number + pdoubles + + + + + + + + + + + + + + + + + + + + + + + + + + text/plain; charset=UTF-8 + + + + + ${velocity.template.base.dir:} + ${velocity.solr.resource.loader.enabled:true} + ${velocity.params.resource.loader.enabled:false} + + + + + 5 + + + + + + + + + + + + + + diff --git a/src/main/resources/solr/concept-sets/conf/stopwords.txt b/src/main/resources/solr/concept-sets/conf/stopwords.txt new file mode 100644 index 0000000000..ae1e83eeb3 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/main/resources/solr/concept-sets/conf/synonyms.txt b/src/main/resources/solr/concept-sets/conf/synonyms.txt new file mode 100644 index 0000000000..eab4ee8753 --- /dev/null +++ b/src/main/resources/solr/concept-sets/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + From 6e2dcb496863f080dd5b6d5513b7f15dd87c80de Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Tue, 22 Nov 2022 17:18:30 +0300 Subject: [PATCH 04/27] Concept sets search - add search availability check --- .../conceptset/search/ConceptSetSearchService.java | 4 ++++ .../org/ohdsi/webapi/service/ConceptSetService.java | 13 +++++++++++++ ...031170000__add_concept_set_search_permission.sql | 4 +++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java index 97ab772956..48a4c4aeb8 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java @@ -29,6 +29,10 @@ public class ConceptSetSearchService { @Autowired private SolrSearchClient solrSearchClient; + public boolean isSearchAvailable() throws Exception { + return solrSearchClient.enabled() && solrSearchClient.getCores().contains(conceptSetsCore); + } + public Set searchConceptSets(final ConceptSetSearchDTO dto) { final SolrClient solrClient = solrSearchClient.getSolrClient(conceptSetsCore); diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index d0389502f7..de719460ba 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -853,6 +853,19 @@ public List listByTags(TagNameListRequestDTO requestDTO) { return listByTags(entities, names, ConceptSetDTO.class); } + /** + * Checks, if concept sets search is available. + * + * @summary Is concept sets search available. + */ + @Path("/searchAvailable") + @GET + public Response isSearchAvailable() throws Exception { + return conceptSetSearchService.isSearchAvailable() + ? Response.ok().build() + : Response.status(Response.Status.NOT_IMPLEMENTED).build(); + } + /** * Search for concept sets. * diff --git a/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql b/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql index e2848f20f2..33b0acedc3 100644 --- a/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql +++ b/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql @@ -1,3 +1,5 @@ +INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES + (nextval('${ohdsiSchema}.sec_permission_id_seq'), 'conceptset:searchAvailable', 'Permission to check the availability of Concept sets search'); INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES (nextval('${ohdsiSchema}.sec_permission_id_seq'), 'conceptset:*:search', 'Concept sets search permission'); INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES @@ -6,7 +8,7 @@ INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES INSERT INTO ${ohdsiSchema}.sec_role_permission(id, role_id, permission_id) SELECT nextval('${ohdsiSchema}.sec_role_permission_sequence'), sr.id, sp.id FROM ${ohdsiSchema}.sec_permission SP, ${ohdsiSchema}.sec_role sr -WHERE sp.value = 'conceptset:*:search' AND sr.name IN ('Atlas users'); +WHERE sp.value in ('conceptset:searchAvailable', 'conceptset:*:search') AND sr.name IN ('Atlas users'); INSERT INTO ${ohdsiSchema}.sec_role_permission(id, role_id, permission_id) SELECT nextval('${ohdsiSchema}.sec_role_permission_sequence'), sr.id, sp.id From 6a49a787b6ef2c8a8352541388ed8c00b979798e Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Tue, 22 Nov 2022 17:30:36 +0300 Subject: [PATCH 05/27] Concept sets search - add search availability checks to reindex --- .../search/ConceptSetSearchService.java | 9 +++- .../webapi/service/ConceptSetService.java | 42 +++++++++++-------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java index 48a4c4aeb8..03cfbb8cc9 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java @@ -29,8 +29,13 @@ public class ConceptSetSearchService { @Autowired private SolrSearchClient solrSearchClient; - public boolean isSearchAvailable() throws Exception { - return solrSearchClient.enabled() && solrSearchClient.getCores().contains(conceptSetsCore); + public boolean isSearchAvailable() { + try { + return solrSearchClient.enabled() && solrSearchClient.getCores().contains(conceptSetsCore); + } catch (final Exception e) { + log.error("SOLR error: Concept sets search availability check failed", e); + return false; + } } public Set searchConceptSets(final ConceptSetSearchDTO dto) { diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index de719460ba..07a557068b 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -370,21 +370,24 @@ public boolean saveConceptSetItems(@PathParam("id") final int id, ConceptSetItem getConceptSetItemRepository().save(csi); } - // Index concept set for search - final ConceptSetExport csExport = getConceptSetForExport(id, new SourceInfo(sourceService.getPriorityVocabularySource())); - - final Collection concepts = csExport.mappedConcepts.stream() - .map(item -> { - final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); - concept.setConceptSetId(id); - concept.setConceptId(item.conceptId); - concept.setConceptName(item.conceptName); - concept.setConceptCode(item.conceptCode); - concept.setDomainName(item.domainId); - return concept; - }).collect(Collectors.toList()); - - conceptSetSearchService.reindexConceptSet(id, concepts); + if (conceptSetSearchService.isSearchAvailable()) { + + // Index concept set for search + final ConceptSetExport csExport = getConceptSetForExport(id, new SourceInfo(sourceService.getPriorityVocabularySource())); + + final Collection concepts = csExport.mappedConcepts.stream() + .map(item -> { + final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); + concept.setConceptSetId(id); + concept.setConceptId(item.conceptId); + concept.setConceptName(item.conceptName); + concept.setConceptCode(item.conceptCode); + concept.setDomainName(item.domainId); + return concept; + }).collect(Collectors.toList()); + + conceptSetSearchService.reindexConceptSet(id, concepts); + } return true; } @@ -860,7 +863,7 @@ public List listByTags(TagNameListRequestDTO requestDTO) { */ @Path("/searchAvailable") @GET - public Response isSearchAvailable() throws Exception { + public Response isSearchAvailable() { return conceptSetSearchService.isSearchAvailable() ? Response.ok().build() : Response.status(Response.Status.NOT_IMPLEMENTED).build(); @@ -905,7 +908,11 @@ public Collection executeSearch(@PathParam("sourceKey") String so @GET @Produces(MediaType.APPLICATION_JSON) @Consumes(MediaType.APPLICATION_JSON) - public void fullIndex(@PathParam("sourceKey") String sourceKey) { + public Response fullIndex(@PathParam("sourceKey") String sourceKey) { + if (!conceptSetSearchService.isSearchAvailable()) { + return Response.status(Response.Status.NOT_IMPLEMENTED).build(); + } + final Collection documents = new ArrayList<>(); getConceptSetRepository().findAll().forEach(cs -> { @@ -928,6 +935,7 @@ public void fullIndex(@PathParam("sourceKey") String sourceKey) { log.info("Full reindex start"); conceptSetSearchService.indexConceptSetsFull(documents); log.info("Full reindex finish"); + return Response.ok().build(); } private void checkVersion(int id, int version) { From 5d6e747bf632c93bb44df65a1ae1381655bf4235 Mon Sep 17 00:00:00 2001 From: MakTakin Date: Tue, 22 Nov 2022 18:46:05 +0300 Subject: [PATCH 06/27] Concept sets search - add locales to new buttons --- src/main/resources/i18n/messages_en.json | 5 ++++- src/main/resources/i18n/messages_ko.json | 5 ++++- src/main/resources/i18n/messages_ru.json | 5 ++++- src/main/resources/i18n/messages_zh.json | 5 ++++- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/main/resources/i18n/messages_en.json b/src/main/resources/i18n/messages_en.json index 828bb58db6..e36e9f379f 100644 --- a/src/main/resources/i18n/messages_en.json +++ b/src/main/resources/i18n/messages_en.json @@ -1713,7 +1713,10 @@ "tabs": { "export": "Export", "list": "List" - } + }, + "searchCS": "Search Concept Sets", + "closeSearchCS":"Close Search Concept Sets", + "clearSearch": "Clear search concept sets" }, "manager": { "attemptingToFindMessage": "Attempting to find an optimal definition for this concept set...", diff --git a/src/main/resources/i18n/messages_ko.json b/src/main/resources/i18n/messages_ko.json index 03a09ed81d..e75e25ddfe 100644 --- a/src/main/resources/i18n/messages_ko.json +++ b/src/main/resources/i18n/messages_ko.json @@ -1713,7 +1713,10 @@ "tabs": { "export": "내보내기", "list": "리스트" - } + }, + "searchCS": "Search Concept Sets", + "closeSearchCS":"Close Search Concept Sets", + "clearSearch": "Clear search concept sets" }, "manager": { "attemptingToFindMessage": "이 컨셉 세트에 대한 최적의 정의를 찾는 중...", diff --git a/src/main/resources/i18n/messages_ru.json b/src/main/resources/i18n/messages_ru.json index 43594f5683..dad63d448f 100644 --- a/src/main/resources/i18n/messages_ru.json +++ b/src/main/resources/i18n/messages_ru.json @@ -1829,7 +1829,10 @@ "saveMessage": "Сохранить набор концепций из списка ниже", "saveWarning": "Сохраните, пожалуйста, текущий набор концепций перед сравнением.", "saveFromComparisonNameTail": " - из сравнения" - } + }, + "searchCS": "Поиск наборов концепций", + "closeSearchCS":"Закрыть поиск наборов концепций", + "clearSearch": "Очистить поиск наборов концепций" } }, "cc": { diff --git a/src/main/resources/i18n/messages_zh.json b/src/main/resources/i18n/messages_zh.json index b2f405ad4e..ea129fcb61 100644 --- a/src/main/resources/i18n/messages_zh.json +++ b/src/main/resources/i18n/messages_zh.json @@ -1713,7 +1713,10 @@ "tabs": { "export": "导出", "list": "列表" - } + }, + "searchCS": "Search Concept Sets", + "closeSearchCS":"Close Search Concept Sets", + "clearSearch": "Clear search concept sets" }, "manager": { "attemptingToFindMessage": "正在尝试为此概念集找到最佳定义...", From b4c4decdbafd4e47a3611e506a1ae6c57efc18f8 Mon Sep 17 00:00:00 2001 From: MakTakin Date: Tue, 22 Nov 2022 18:48:23 +0300 Subject: [PATCH 07/27] Concept sets search - change key of clear search --- src/main/resources/i18n/messages_en.json | 2 +- src/main/resources/i18n/messages_ko.json | 2 +- src/main/resources/i18n/messages_ru.json | 2 +- src/main/resources/i18n/messages_zh.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/resources/i18n/messages_en.json b/src/main/resources/i18n/messages_en.json index e36e9f379f..b2c8b02e6d 100644 --- a/src/main/resources/i18n/messages_en.json +++ b/src/main/resources/i18n/messages_en.json @@ -1716,7 +1716,7 @@ }, "searchCS": "Search Concept Sets", "closeSearchCS":"Close Search Concept Sets", - "clearSearch": "Clear search concept sets" + "clearSearchCS": "Clear search concept sets" }, "manager": { "attemptingToFindMessage": "Attempting to find an optimal definition for this concept set...", diff --git a/src/main/resources/i18n/messages_ko.json b/src/main/resources/i18n/messages_ko.json index e75e25ddfe..e53b6bb7d8 100644 --- a/src/main/resources/i18n/messages_ko.json +++ b/src/main/resources/i18n/messages_ko.json @@ -1716,7 +1716,7 @@ }, "searchCS": "Search Concept Sets", "closeSearchCS":"Close Search Concept Sets", - "clearSearch": "Clear search concept sets" + "clearSearchCS": "Clear search concept sets" }, "manager": { "attemptingToFindMessage": "이 컨셉 세트에 대한 최적의 정의를 찾는 중...", diff --git a/src/main/resources/i18n/messages_ru.json b/src/main/resources/i18n/messages_ru.json index dad63d448f..22df59eb15 100644 --- a/src/main/resources/i18n/messages_ru.json +++ b/src/main/resources/i18n/messages_ru.json @@ -1832,7 +1832,7 @@ }, "searchCS": "Поиск наборов концепций", "closeSearchCS":"Закрыть поиск наборов концепций", - "clearSearch": "Очистить поиск наборов концепций" + "clearSearchCS": "Очистить поиск наборов концепций" } }, "cc": { diff --git a/src/main/resources/i18n/messages_zh.json b/src/main/resources/i18n/messages_zh.json index ea129fcb61..c2288602a3 100644 --- a/src/main/resources/i18n/messages_zh.json +++ b/src/main/resources/i18n/messages_zh.json @@ -1716,7 +1716,7 @@ }, "searchCS": "Search Concept Sets", "closeSearchCS":"Close Search Concept Sets", - "clearSearch": "Clear search concept sets" + "clearSearchCS": "Clear search concept sets" }, "manager": { "attemptingToFindMessage": "正在尝试为此概念集找到最佳定义...", From db83d542014364fd6e607f1e4492b18c7ed79d0c Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Tue, 22 Nov 2022 19:07:12 +0300 Subject: [PATCH 08/27] Concept sets search - search availability check now returns boolean --- .../java/org/ohdsi/webapi/service/ConceptSetService.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index 07a557068b..47e070f427 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -863,10 +863,8 @@ public List listByTags(TagNameListRequestDTO requestDTO) { */ @Path("/searchAvailable") @GET - public Response isSearchAvailable() { - return conceptSetSearchService.isSearchAvailable() - ? Response.ok().build() - : Response.status(Response.Status.NOT_IMPLEMENTED).build(); + public boolean isSearchAvailable() { + return conceptSetSearchService.isSearchAvailable(); } /** From e1511a080672d1afa3a41ae206d465262c0a32bb Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Wed, 23 Nov 2022 14:33:22 +0300 Subject: [PATCH 09/27] Concept sets search - add solr conf file --- src/main/resources/solr/concept-sets/core.properties | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 src/main/resources/solr/concept-sets/core.properties diff --git a/src/main/resources/solr/concept-sets/core.properties b/src/main/resources/solr/concept-sets/core.properties new file mode 100644 index 0000000000..1d19049ba1 --- /dev/null +++ b/src/main/resources/solr/concept-sets/core.properties @@ -0,0 +1,4 @@ +name=webapi-conceptsets +config=solrconfig.xml +schema=managed-schema +dataDir=data From 6afec854782088bc8c847fce6821c99eb07620aa Mon Sep 17 00:00:00 2001 From: MakTakin Date: Wed, 23 Nov 2022 15:27:06 +0300 Subject: [PATCH 10/27] Concept sets search - add locales to configuration --- src/main/resources/i18n/messages_en.json | 10 +++++++--- src/main/resources/i18n/messages_ko.json | 10 +++++++--- src/main/resources/i18n/messages_ru.json | 12 ++++++++---- src/main/resources/i18n/messages_zh.json | 10 +++++++--- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/main/resources/i18n/messages_en.json b/src/main/resources/i18n/messages_en.json index b2c8b02e6d..af0e8d8b76 100644 --- a/src/main/resources/i18n/messages_en.json +++ b/src/main/resources/i18n/messages_en.json @@ -2445,7 +2445,8 @@ "alerts": { "clearLocalCache": "Local Storage has been cleared. Please refresh the page to reload configuration information.", "clearServerCache": "Server cache has been cleared.", - "failUpdatePrioritySourceDaimon": "Failed to update priority source daimon" + "failUpdatePrioritySourceDaimon": "Failed to update priority source daimon", + "changeSource": "You are changing current source, we recommend you to do reindexing concept sets" }, "buttons": { "check": "check", @@ -2453,11 +2454,13 @@ "clearConfigurationCache": "Clear Configuration Cache", "clearServerCache": "Clear Server Cache", "importUsers": "Import Users from LDAP/AD", - "managePermissions": "Manage Permissions" + "managePermissions": "Manage Permissions", + "reindexCS": "Concept Sets Reindex" }, "changeSourcePriorities": "Change source priorities in:", "confirms": { - "clearServerCache": "Are you sure you want to clear the server cache?" + "clearServerCache": "Are you sure you want to clear the server cache?", + "reindexSource": "Reindexing may take a long time. It depends on amount and complexity of concept sets" }, "executionEngine": "Execution engine", "newSource": "New Source", @@ -2487,6 +2490,7 @@ "rolesDefinitionJson": "Roles definition JSON", "title": "Import roles" }, + "searchCS":"Search Concept Sets", "title": "Configuration", "userImport": { "findGroup": "Find Group", diff --git a/src/main/resources/i18n/messages_ko.json b/src/main/resources/i18n/messages_ko.json index e53b6bb7d8..e337feda26 100644 --- a/src/main/resources/i18n/messages_ko.json +++ b/src/main/resources/i18n/messages_ko.json @@ -2445,7 +2445,8 @@ "alerts": { "clearLocalCache": "로컬 저장소가 지워졌습니다. 구성 정보를 다시 로드하려면 페이지를 새로 고침하세요.", "clearServerCache": "서버 캐시가 지워졌습니다.", - "failUpdatePrioritySourceDaimon": "우선순위 소스(priority source) daimon을 업데이트하지 못했습니다." + "failUpdatePrioritySourceDaimon": "우선순위 소스(priority source) daimon을 업데이트하지 못했습니다.", + "changeSource": "You are changing current source, we recommend you to do reindexing concept sets" }, "buttons": { "check": "확인", @@ -2453,11 +2454,13 @@ "clearConfigurationCache": "구성 캐시 지우기", "clearServerCache": "서버 캐시 지우기", "importUsers": "LDAP/AD에서 사용자 가져오기", - "managePermissions": "사용 권한 관리" + "managePermissions": "사용 권한 관리", + "reindexCS": "Concept Sets Reindex" }, "changeSourcePriorities": "다음에서 소스 우선 순위 변경:", "confirms": { - "clearServerCache": "서버 캐시를 지우시겠습니까?" + "clearServerCache": "서버 캐시를 지우시겠습니까?", + "reindexSource": "Reindexing may take a long time. It depends on amount and complexity of concept sets" }, "executionEngine": "실행 엔진", "newSource": "새로운 소스", @@ -2487,6 +2490,7 @@ "rolesDefinitionJson": "역할 정의(Roles definition) JSON", "title": "역할 가져오기" }, + "searchCS":"Search Concept Sets", "title": "환경설정", "userImport": { "findGroup": "그룹 찾기", diff --git a/src/main/resources/i18n/messages_ru.json b/src/main/resources/i18n/messages_ru.json index 22df59eb15..5af6cfc359 100644 --- a/src/main/resources/i18n/messages_ru.json +++ b/src/main/resources/i18n/messages_ru.json @@ -2452,7 +2452,8 @@ "clearConfigurationCache": "Очистить кэш конфигурации", "managePermissions": "Управление разрешениями", "importUsers": "Импорт пользователей из LDAP/AD", - "clearServerCache": "Очистить серверный кэш" + "clearServerCache": "Очистить серверный кэш", + "reindexCS": "Переиндексация наборов концептов" }, "priorityOptions": { "session": "В текущей сессии", @@ -2461,10 +2462,12 @@ "alerts": { "clearLocalCache": "Локальное хранилище очищено. Пожалуйста, перезагрузите страницу, чтобы обновитьинформацию о конфигурации.", "clearServerCache": "Серверный кэш очищен", - "failUpdatePrioritySourceDaimon": "Не удалось обновить демона приоритета источника" + "failUpdatePrioritySourceDaimon": "Не удалось обновить демона приоритета источника", + "changeSource": "Вы меняете текущий источник. Мы рекомендуем вам сделать реиндексацию наборов концептов" }, "confirms": { - "clearServerCache": "Вы действительно хотите очистить серверный кэш?" + "clearServerCache": "Вы действительно хотите очистить серверный кэш?", + "reindexSource": "Переиндексация может занять много времени. Это зависит от количества и сложности наборов концептов" }, "viewEdit": { "source": { @@ -2676,7 +2679,8 @@ "fixJson": "Исправить JSON", "alertWarning": "Некоторые из разрешений в списке содержат идентификаторы сущностей, которые не гарантируют совпадение между исходным и целевым экземплярами Atlas.", "removeThePermissions": "Удалить разрешения" - } + }, + "searchCS":"Поиск наборов концептов" }, "feedback": { "title": "Обратная связь", diff --git a/src/main/resources/i18n/messages_zh.json b/src/main/resources/i18n/messages_zh.json index c2288602a3..8f513f4f82 100644 --- a/src/main/resources/i18n/messages_zh.json +++ b/src/main/resources/i18n/messages_zh.json @@ -2445,7 +2445,8 @@ "alerts": { "clearLocalCache": "本地存储已清除。 请刷新页面以重新加载配置信息。", "clearServerCache": "服务器缓存已清除。", - "failUpdatePrioritySourceDaimon": "无法更新优先级源守护程序" + "failUpdatePrioritySourceDaimon": "无法更新优先级源守护程序", + "changeSource": "You are changing current source, we recommend you to do reindexing concept sets" }, "buttons": { "check": "检查", @@ -2453,11 +2454,13 @@ "clearConfigurationCache": "清除配置缓存", "clearServerCache": "清除服务器缓存", "importUsers": "从LDAP / AD导入用户", - "managePermissions": "管理权限" + "managePermissions": "管理权限", + "reindexCS": "Concept Sets Reindex" }, "changeSourcePriorities": "更改源优先级:", "confirms": { - "clearServerCache": "您确定要清除服务器缓存吗?" + "clearServerCache": "您确定要清除服务器缓存吗?", + "reindexSource": "Reindexing may take a long time. It depends on amount and complexity of concept sets" }, "executionEngine": "执行引擎", "newSource": "新来源", @@ -2487,6 +2490,7 @@ "rolesDefinitionJson": "角色定义JSON", "title": "导入角色" }, + "searchCS":"Search Concept Sets", "title": "配置", "userImport": { "findGroup": "查找组", From 6ae0594713f45512517905df2ba9aa6ed836a6b4 Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Fri, 25 Nov 2022 13:36:04 +0300 Subject: [PATCH 11/27] Concept sets search - reindex and search small fixes, async reindex initial, WIP --- .../search/ConceptSetSearchService.java | 10 ++--- .../webapi/service/ConceptSetService.java | 18 +++++---- .../service/dto/ConceptSetReindexDTO.java | 40 +++++++++++++++++++ src/main/resources/i18n/messages_en.json | 4 +- 4 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java index 03cfbb8cc9..4f9c67720f 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java @@ -5,7 +5,6 @@ import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.common.SolrInputDocument; -import org.ohdsi.webapi.conceptset.ConceptSet; import org.ohdsi.webapi.service.dto.ConceptSetSearchDTO; import org.ohdsi.webapi.vocabulary.SolrSearchClient; import org.slf4j.Logger; @@ -14,7 +13,6 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -103,13 +101,13 @@ public void deleteConceptSetIndex(final Integer conceptSetId) { } private String composeSearchQuery(final ConceptSetSearchDTO dto) { - String searchQuery = solrSearchClient.formatSearchQuery(dto.getQuery().trim()); + final String searchQuery = solrSearchClient.escapeNonWildcardQuery(dto.getQuery().trim()); if (dto.getDomainId() != null && dto.getDomainId().length > 0) { - searchQuery += " AND domain_name:(" + String.join(" OR ", dto.getDomainId()) + ")"; + return String.format("query:%s AND domain_name:(%s)", searchQuery, String.join(" OR ", dto.getDomainId())); + } else { + return String.format("query:%s", searchQuery); } - - return searchQuery; } private void addDocumentToIndex(final SolrClient solrClient, final ConceptSetSearchDocument searchDocument) { diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index 47e070f427..29e4656378 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -17,7 +17,6 @@ import java.io.ByteArrayOutputStream; import java.util.*; -import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import javax.transaction.Transactional; @@ -40,6 +39,7 @@ import org.ohdsi.webapi.exception.ConceptNotExistException; import org.ohdsi.webapi.security.PermissionService; import org.ohdsi.webapi.service.dto.ConceptSetDTO; +import org.ohdsi.webapi.service.dto.ConceptSetReindexDTO; import org.ohdsi.webapi.service.dto.ConceptSetSearchDTO; import org.ohdsi.webapi.shiro.Entities.UserEntity; import org.ohdsi.webapi.shiro.Entities.UserRepository; @@ -64,7 +64,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.convert.support.GenericConversionService; import org.springframework.dao.EmptyResultDataAccessException; -import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Component; /** @@ -906,14 +905,16 @@ public Collection executeSearch(@PathParam("sourceKey") String so @GET @Produces(MediaType.APPLICATION_JSON) @Consumes(MediaType.APPLICATION_JSON) - public Response fullIndex(@PathParam("sourceKey") String sourceKey) { + public ConceptSetReindexDTO fullIndex(@PathParam("sourceKey") String sourceKey) { if (!conceptSetSearchService.isSearchAvailable()) { - return Response.status(Response.Status.NOT_IMPLEMENTED).build(); + return new ConceptSetReindexDTO("UNAVAILABLE", 0 , 0); } final Collection documents = new ArrayList<>(); - getConceptSetRepository().findAll().forEach(cs -> { + final Iterable conceptSets = getConceptSetRepository().findAll(); + + conceptSets.forEach(cs -> { final ConceptSetExpression csExpression = this.getConceptSetExpression(cs.getId()); final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); @@ -930,10 +931,11 @@ public Response fullIndex(@PathParam("sourceKey") String sourceKey) { log.info("Concept set {} added to reindex", cs.getId()); }); - log.info("Full reindex start"); + log.info("Full concept sets reindex start"); conceptSetSearchService.indexConceptSetsFull(documents); - log.info("Full reindex finish"); - return Response.ok().build(); + log.info("Full concept sets reindex finish"); + + return new ConceptSetReindexDTO("COMPLETED", 0, 0); } private void checkVersion(int id, int version) { diff --git a/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java b/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java new file mode 100644 index 0000000000..e800efecbb --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java @@ -0,0 +1,40 @@ +package org.ohdsi.webapi.service.dto; + +public class ConceptSetReindexDTO { + private String status; + private int maxCount; + private int doneCount; + + public ConceptSetReindexDTO() { + } + + public ConceptSetReindexDTO(final String status, final int maxCount, final int doneCount) { + this.status = status; + this.maxCount = maxCount; + this.doneCount = doneCount; + } + + public String getStatus() { + return status; + } + + public void setStatus(final String status) { + this.status = status; + } + + public int getMaxCount() { + return maxCount; + } + + public void setMaxCount(final int maxCount) { + this.maxCount = maxCount; + } + + public int getDoneCount() { + return doneCount; + } + + public void setDoneCount(final int doneCount) { + this.doneCount = doneCount; + } +} diff --git a/src/main/resources/i18n/messages_en.json b/src/main/resources/i18n/messages_en.json index af0e8d8b76..4ffdd8d900 100644 --- a/src/main/resources/i18n/messages_en.json +++ b/src/main/resources/i18n/messages_en.json @@ -2446,7 +2446,7 @@ "clearLocalCache": "Local Storage has been cleared. Please refresh the page to reload configuration information.", "clearServerCache": "Server cache has been cleared.", "failUpdatePrioritySourceDaimon": "Failed to update priority source daimon", - "changeSource": "You are changing current source, we recommend you to do reindexing concept sets" + "changeSource": "You are changing current source, we recommend you to reindex concept sets" }, "buttons": { "check": "check", @@ -2490,7 +2490,7 @@ "rolesDefinitionJson": "Roles definition JSON", "title": "Import roles" }, - "searchCS":"Search Concept Sets", + "searchCS": "Concept Sets Search", "title": "Configuration", "userImport": { "findGroup": "Find Group", From cb61e56790b5cbf52c6f1b587ae1be2ea668fce8 Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Mon, 28 Nov 2022 12:21:45 +0300 Subject: [PATCH 12/27] reindex was moved to spring job --- .../search/ConceptSetReindexStatus.java | 5 + .../search/ConceptSetReindexTasklet.java | 86 ++++++++++++ .../webapi/service/ConceptSetService.java | 130 ++++++++++++------ .../service/dto/ConceptSetReindexDTO.java | 33 +---- 4 files changed, 187 insertions(+), 67 deletions(-) create mode 100644 src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexStatus.java create mode 100644 src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexTasklet.java diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexStatus.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexStatus.java new file mode 100644 index 0000000000..ef58e973d0 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexStatus.java @@ -0,0 +1,5 @@ +package org.ohdsi.webapi.conceptset.search; + +public enum ConceptSetReindexStatus { + UNAVAILABLE, CREATED, STARTED, COMPLETED, FAILED +} \ No newline at end of file diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexTasklet.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexTasklet.java new file mode 100644 index 0000000000..c0a34d6f40 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexTasklet.java @@ -0,0 +1,86 @@ +package org.ohdsi.webapi.conceptset.search; + +import org.ohdsi.circe.vocabulary.ConceptSetExpression; +import org.ohdsi.webapi.Constants; +import org.ohdsi.webapi.conceptset.ConceptSet; +import org.ohdsi.webapi.exception.AtlasException; +import org.ohdsi.webapi.service.ConceptSetService; +import org.ohdsi.webapi.service.VocabularyService; +import org.ohdsi.webapi.vocabulary.Concept; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.batch.core.ExitStatus; +import org.springframework.batch.core.StepContribution; +import org.springframework.batch.core.scope.context.ChunkContext; +import org.springframework.batch.core.step.tasklet.Tasklet; +import org.springframework.batch.repeat.RepeatStatus; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.ohdsi.webapi.Constants.Params.SOURCE_KEY; + +@Service +public class ConceptSetReindexTasklet implements Tasklet { + protected final Logger log = LoggerFactory.getLogger(getClass()); + + private ConceptSetSearchService conceptSetSearchService; + + private ConceptSetService conceptSetService; + + private VocabularyService vocabService; + + public ConceptSetReindexTasklet(ConceptSetSearchService conceptSetSearchService, + ConceptSetService conceptSetService, + VocabularyService vocabService) { + this.conceptSetSearchService = conceptSetSearchService; + this.conceptSetService = conceptSetService; + this.vocabService = vocabService; + } + + @Override + public RepeatStatus execute(StepContribution stepContribution, ChunkContext chunkContext) throws Exception { + try { + fullIndex(chunkContext); + stepContribution.setExitStatus(ExitStatus.COMPLETED); + } catch (final Throwable ex) { + log.error(ex.getMessage(), ex); + stepContribution.setExitStatus(new ExitStatus(Constants.FAILED, ex.getMessage())); + throw new AtlasException(ex); + } + return RepeatStatus.FINISHED; + } + + private void fullIndex(ChunkContext chunkContext) { + Map jobParams = chunkContext.getStepContext().getJobParameters(); + String sourceKey = jobParams.get(SOURCE_KEY).toString(); + + final Collection documents = new ArrayList<>(); + + final Iterable conceptSets = conceptSetService.getConceptSetRepository().findAll(); + + conceptSets.forEach(cs -> { + final ConceptSetExpression csExpression = conceptSetService.getConceptSetExpression(cs.getId()); + final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); + + documents.addAll(concepts.stream().map(item -> { + final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); + concept.setConceptSetId(cs.getId()); + concept.setConceptId(item.conceptId); + concept.setConceptName(item.conceptName); + concept.setConceptCode(item.conceptCode); + concept.setDomainName(item.domainId); + return concept; + }).collect(Collectors.toList())); + + log.info("Concept set {} added to reindex", cs.getId()); + }); + + log.info("Full concept sets reindex start"); + conceptSetSearchService.indexConceptSetsFull(documents); + log.info("Full concept sets reindex finish"); + } +} \ No newline at end of file diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index 29e4656378..c197995d0c 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -15,17 +15,9 @@ */ package org.ohdsi.webapi.service; -import java.io.ByteArrayOutputStream; -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import javax.transaction.Transactional; -import javax.ws.rs.*; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; - import org.apache.shiro.authz.UnauthorizedException; import org.ohdsi.circe.vocabulary.ConceptSetExpression; +import org.ohdsi.webapi.Constants; import org.ohdsi.webapi.check.CheckResult; import org.ohdsi.webapi.check.checker.conceptset.ConceptSetChecker; import org.ohdsi.webapi.conceptset.ConceptSet; @@ -34,9 +26,13 @@ import org.ohdsi.webapi.conceptset.ConceptSetGenerationInfoRepository; import org.ohdsi.webapi.conceptset.ConceptSetItem; import org.ohdsi.webapi.conceptset.dto.ConceptSetVersionFullDTO; +import org.ohdsi.webapi.conceptset.search.ConceptSetReindexStatus; +import org.ohdsi.webapi.conceptset.search.ConceptSetReindexTasklet; import org.ohdsi.webapi.conceptset.search.ConceptSetSearchDocument; import org.ohdsi.webapi.conceptset.search.ConceptSetSearchService; import org.ohdsi.webapi.exception.ConceptNotExistException; +import org.ohdsi.webapi.job.JobExecutionResource; +import org.ohdsi.webapi.job.JobTemplate; import org.ohdsi.webapi.security.PermissionService; import org.ohdsi.webapi.service.dto.ConceptSetDTO; import org.ohdsi.webapi.service.dto.ConceptSetReindexDTO; @@ -50,9 +46,9 @@ import org.ohdsi.webapi.source.SourceService; import org.ohdsi.webapi.tag.domain.HasTags; import org.ohdsi.webapi.tag.dto.TagNameListRequestDTO; +import org.ohdsi.webapi.util.ExceptionUtils; import org.ohdsi.webapi.util.ExportUtil; import org.ohdsi.webapi.util.NameUtils; -import org.ohdsi.webapi.util.ExceptionUtils; import org.ohdsi.webapi.versioning.domain.ConceptSetVersion; import org.ohdsi.webapi.versioning.domain.Version; import org.ohdsi.webapi.versioning.domain.VersionBase; @@ -61,12 +57,47 @@ import org.ohdsi.webapi.versioning.dto.VersionUpdateDTO; import org.ohdsi.webapi.versioning.service.VersionService; import org.ohdsi.webapi.vocabulary.Concept; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; +import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.convert.support.GenericConversionService; import org.springframework.dao.EmptyResultDataAccessException; import org.springframework.stereotype.Component; - /** +import javax.transaction.Transactional; +import javax.ws.rs.Consumes; +import javax.ws.rs.DELETE; +import javax.ws.rs.DefaultValue; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.PUT; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static org.ohdsi.webapi.Constants.Params.JOB_NAME; + +/** * Provides REST services for working with * concept sets. * @@ -110,8 +141,22 @@ public class ConceptSetService extends AbstractDaoService implements HasTags versionService; + @Autowired + private StepBuilderFactory stepBuilderFactory; + + @Autowired + private JobTemplate jobTemplate; + + @Autowired + private JobBuilderFactory jobBuilders; + + @Autowired + private JobService jobService; + public static final String COPY_NAME = "copyName"; + private static final String REINDEX_JOB_NAME = "reindexJob_%sourceKey%"; + /** * Get the concept set based in the identifier * @@ -863,7 +908,7 @@ public List listByTags(TagNameListRequestDTO requestDTO) { @Path("/searchAvailable") @GET public boolean isSearchAvailable() { - return conceptSetSearchService.isSearchAvailable(); + return true;//conceptSetSearchService.isSearchAvailable(); } /** @@ -907,35 +952,20 @@ public Collection executeSearch(@PathParam("sourceKey") String so @Consumes(MediaType.APPLICATION_JSON) public ConceptSetReindexDTO fullIndex(@PathParam("sourceKey") String sourceKey) { if (!conceptSetSearchService.isSearchAvailable()) { - return new ConceptSetReindexDTO("UNAVAILABLE", 0 , 0); + return new ConceptSetReindexDTO(ConceptSetReindexStatus.UNAVAILABLE); + } + String jobName = REINDEX_JOB_NAME.replaceAll("sourceKey", sourceKey); + JobExecutionResource jobExecutionResource = jobService.findJobByName(jobName, jobName); + if (jobExecutionResource == null) { + startReindexJob(sourceKey); + return new ConceptSetReindexDTO(ConceptSetReindexStatus.CREATED); + } else if ("COMPLETED".equals(jobExecutionResource.getStatus())) { + return new ConceptSetReindexDTO(ConceptSetReindexStatus.COMPLETED); + } else if ("FAILED".equals(jobExecutionResource.getStatus())) { + return new ConceptSetReindexDTO(ConceptSetReindexStatus.FAILED); + } else { + return new ConceptSetReindexDTO(ConceptSetReindexStatus.STARTED); } - - final Collection documents = new ArrayList<>(); - - final Iterable conceptSets = getConceptSetRepository().findAll(); - - conceptSets.forEach(cs -> { - final ConceptSetExpression csExpression = this.getConceptSetExpression(cs.getId()); - final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); - - documents.addAll(concepts.stream().map(item -> { - final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); - concept.setConceptSetId(cs.getId()); - concept.setConceptId(item.conceptId); - concept.setConceptName(item.conceptName); - concept.setConceptCode(item.conceptCode); - concept.setDomainName(item.domainId); - return concept; - }).collect(Collectors.toList())); - - log.info("Concept set {} added to reindex", cs.getId()); - }); - - log.info("Full concept sets reindex start"); - conceptSetSearchService.indexConceptSetsFull(documents); - log.info("Full concept sets reindex finish"); - - return new ConceptSetReindexDTO("COMPLETED", 0, 0); } private void checkVersion(int id, int version) { @@ -962,4 +992,24 @@ private ConceptSetVersion saveVersion(int id) { version.setCreatedDate(versionDate); return versionService.create(VersionType.CONCEPT_SET, version); } + + private JobExecutionResource startReindexJob(String sourceKey) { + String jobName = REINDEX_JOB_NAME.replaceAll("sourceKey", sourceKey); + JobParametersBuilder parametersBuilder = new JobParametersBuilder(); + parametersBuilder.addString(JOB_NAME, jobName); + parametersBuilder.addString(Constants.Params.SOURCE_KEY, sourceKey); + + Job reindexJob = jobBuilders.get(jobName) + .start(getConceptSetReindexStep(jobName)) + .build(); + + return jobTemplate.launch(reindexJob, parametersBuilder.toJobParameters()); + } + + private Step getConceptSetReindexStep(String stepName) { + ConceptSetReindexTasklet tasklet = new ConceptSetReindexTasklet(conceptSetSearchService, this, vocabService); + return stepBuilderFactory.get(stepName) + .tasklet(tasklet) + .build(); + } } diff --git a/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java b/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java index e800efecbb..6cc639a7a5 100644 --- a/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java +++ b/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java @@ -1,40 +1,19 @@ package org.ohdsi.webapi.service.dto; -public class ConceptSetReindexDTO { - private String status; - private int maxCount; - private int doneCount; +import org.ohdsi.webapi.conceptset.search.ConceptSetReindexStatus; - public ConceptSetReindexDTO() { - } +public class ConceptSetReindexDTO { + private ConceptSetReindexStatus status; - public ConceptSetReindexDTO(final String status, final int maxCount, final int doneCount) { + public ConceptSetReindexDTO(final ConceptSetReindexStatus status) { this.status = status; - this.maxCount = maxCount; - this.doneCount = doneCount; } - public String getStatus() { + public ConceptSetReindexStatus getStatus() { return status; } - public void setStatus(final String status) { + public void setStatus(final ConceptSetReindexStatus status) { this.status = status; } - - public int getMaxCount() { - return maxCount; - } - - public void setMaxCount(final int maxCount) { - this.maxCount = maxCount; - } - - public int getDoneCount() { - return doneCount; - } - - public void setDoneCount(final int doneCount) { - this.doneCount = doneCount; - } } From 7154d78f49ee26b3252be860f3571e54583cec5c Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Wed, 30 Nov 2022 14:32:29 +0300 Subject: [PATCH 13/27] reindex was moved to spring job --- .../conceptset/ConceptSetRepository.java | 2 + .../search/ConceptSetReindexJobService.java | 250 ++++++++++++++++++ .../search/ConceptSetReindexStatus.java | 2 +- .../search/ConceptSetReindexTasklet.java | 86 ------ .../search/ConceptSetSearchService.java | 9 + .../webapi/service/ConceptSetService.java | 80 ++---- .../service/dto/ConceptSetReindexDTO.java | 32 +++ src/main/resources/i18n/messages_en.json | 6 +- src/main/resources/i18n/messages_ko.json | 6 +- src/main/resources/i18n/messages_ru.json | 6 +- src/main/resources/i18n/messages_zh.json | 6 +- 11 files changed, 330 insertions(+), 155 deletions(-) create mode 100644 src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java delete mode 100644 src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexTasklet.java diff --git a/src/main/java/org/ohdsi/webapi/conceptset/ConceptSetRepository.java b/src/main/java/org/ohdsi/webapi/conceptset/ConceptSetRepository.java index 6a66d3c919..966b1357d4 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/ConceptSetRepository.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/ConceptSetRepository.java @@ -48,4 +48,6 @@ public interface ConceptSetRepository extends CrudRepository findByTags(@Param("tagNames") List tagNames); + + List findAll(); } diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java new file mode 100644 index 0000000000..c50a4fc9c2 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java @@ -0,0 +1,250 @@ +package org.ohdsi.webapi.conceptset.search; + +import org.ohdsi.circe.vocabulary.ConceptSetExpression; +import org.ohdsi.webapi.Constants; +import org.ohdsi.webapi.conceptset.ConceptSet; +import org.ohdsi.webapi.job.JobExecutionResource; +import org.ohdsi.webapi.job.JobTemplate; +import org.ohdsi.webapi.service.ConceptSetService; +import org.ohdsi.webapi.service.VocabularyService; +import org.ohdsi.webapi.service.dto.ConceptSetReindexDTO; +import org.ohdsi.webapi.vocabulary.Concept; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.batch.core.ChunkListener; +import org.springframework.batch.core.ExitStatus; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.StepExecution; +import org.springframework.batch.core.StepExecutionListener; +import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; +import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; +import org.springframework.batch.core.explore.JobExplorer; +import org.springframework.batch.core.scope.context.ChunkContext; +import org.springframework.batch.item.ItemReader; +import org.springframework.batch.item.ItemWriter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static org.ohdsi.webapi.Constants.Params.JOB_NAME; + +@Service +public class ConceptSetReindexJobService { + protected final Logger log = LoggerFactory.getLogger(getClass()); + + @Autowired + private ConceptSetSearchService conceptSetSearchService; + + @Autowired + private ConceptSetService conceptSetService; + + @Autowired + private VocabularyService vocabService; + + @Autowired + private StepBuilderFactory stepBuilderFactory; + + @Autowired + private JobBuilderFactory jobBuilderFactory; + + @Autowired + private JobTemplate jobTemplate; + + @Autowired + private JobExplorer jobExplorer; + + private static final String REINDEX_JOB_NAME = "concept sets reindex: %s"; + + private static final String REINDEX_TOTAL_DOCUMENTS = "REINDEX_TOTAL_DOCUMENTS"; + + private static final String REINDEX_PROCESSED_DOCUMENTS = "REINDEX_PROCESSED_DOCUMENTS"; + + public ConceptSetReindexDTO createIndex(String sourceKey) { + if (!conceptSetSearchService.isSearchAvailable()) { + return new ConceptSetReindexDTO(ConceptSetReindexStatus.UNAVAILABLE); + } + String jobName = String.format(REINDEX_JOB_NAME, sourceKey); + Optional jobExecutionOptional = jobExplorer.findRunningJobExecutions(jobName).stream().findFirst(); + if (jobExecutionOptional.isPresent()) { + return new ConceptSetReindexDTO(ConceptSetReindexStatus.RUNNING, jobExecutionOptional.get().getId()); + } else { + JobExecutionResource newJobExecutionResource = createReindexJob(sourceKey); + return new ConceptSetReindexDTO(ConceptSetReindexStatus.CREATED, newJobExecutionResource.getExecutionId()); + } + } + + public ConceptSetReindexDTO getIndexStatus(String sourceKey, Long executionId) { + if (!conceptSetSearchService.isSearchAvailable()) { + return new ConceptSetReindexDTO(ConceptSetReindexStatus.UNAVAILABLE); + } + String jobName = String.format(REINDEX_JOB_NAME, sourceKey); + Optional jobExecutionOptional = jobExplorer.findRunningJobExecutions(jobName).stream().findFirst(); + JobExecution jobExecution = jobExecutionOptional.orElseGet(() -> jobExplorer.getJobExecution(executionId)); + + if (jobExecution != null) { + ConceptSetReindexDTO conceptSetReindexDTO; + if ("COMPLETED".equals(jobExecution.getStatus().name())) { + conceptSetReindexDTO = new ConceptSetReindexDTO(ConceptSetReindexStatus.COMPLETED); + } else if ("FAILED".equals(jobExecution.getStatus().name())) { + conceptSetReindexDTO = new ConceptSetReindexDTO(ConceptSetReindexStatus.FAILED); + } else { + conceptSetReindexDTO = new ConceptSetReindexDTO(ConceptSetReindexStatus.RUNNING); + } + conceptSetReindexDTO.setExecutionId(jobExecution.getId()); + fillCounts(conceptSetReindexDTO, jobExecution); + return conceptSetReindexDTO; + } else { + return new ConceptSetReindexDTO(ConceptSetReindexStatus.UNAVAILABLE); + } + } + + private JobExecutionResource createReindexJob(String sourceKey) { + String jobName = String.format(REINDEX_JOB_NAME, sourceKey); + final List conceptSets = conceptSetService.getConceptSetRepository().findAll(); + conceptSetSearchService.clearConceptSetIndex(); + Step step = stepBuilderFactory.get(jobName) + .chunk(1) + .reader(new DocumentReader(sourceKey, conceptSets)) + .writer(new DocumentWriter()) + .listener(new JobStepExecutionListener()) + .listener(new JobChunkListener()) + .build(); + + JobParametersBuilder parametersBuilder = new JobParametersBuilder(); + parametersBuilder.addString(JOB_NAME, String.format(REINDEX_JOB_NAME, sourceKey)); + parametersBuilder.addString(Constants.Params.SOURCE_KEY, sourceKey); + parametersBuilder.addString(REINDEX_TOTAL_DOCUMENTS, String.valueOf(conceptSets.size())); + + Job reindexJob = jobBuilderFactory.get(jobName) + .start(step) + .build(); + + return jobTemplate.launch(reindexJob, parametersBuilder.toJobParameters()); + } + + private void fillCounts(ConceptSetReindexDTO conceptSetReindexDTO, JobExecution jobExecution) { + try { + String jobTotalDocuments = jobExecution.getJobParameters().getString(REINDEX_TOTAL_DOCUMENTS); + if (jobTotalDocuments != null) { + conceptSetReindexDTO.setMaxCount(Integer.parseInt(jobTotalDocuments)); + } + Object jobProcessedDocuments = jobExecution.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS); + if (jobProcessedDocuments != null) { + conceptSetReindexDTO.setDoneCount((Integer) jobProcessedDocuments); + } else { + // If the job is still running we can get number of processed documents only from step parameters + jobExecution.getStepExecutions().stream() + .filter(se -> se.getStepName().equals(jobExecution.getJobParameters().getString(JOB_NAME))) + .findFirst() + .ifPresent(se -> { + Object stepProcessedDocuments = se.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS); + if (stepProcessedDocuments != null) { + conceptSetReindexDTO.setDoneCount((Integer) stepProcessedDocuments); + } + }); + } + } catch (Exception e) { + log.error("Failed to get count parameters for job with execution id = {}, {}", jobExecution.getId(), e); + } + } + + private static class ConceptDocuments { + final int conceptSetId; + + final List documents; + + private ConceptDocuments(int conceptSetId, List documents) { + this.conceptSetId = conceptSetId; + this.documents = documents; + } + } + + private class DocumentReader implements ItemReader { + private final Iterator iterator; + + private final String sourceKey; + + public DocumentReader(String sourceKey, List conceptSets) { + this.iterator = conceptSets.stream().iterator(); + this.sourceKey = sourceKey; + } + + @Override + public ConceptDocuments read() throws Exception { + if (iterator.hasNext()) { + ConceptSet conceptSet = iterator.next(); + final ConceptSetExpression csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); + final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); + + final List documents = concepts.stream().map(item -> { + final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); + concept.setConceptSetId(conceptSet.getId()); + concept.setConceptId(item.conceptId); + concept.setConceptName(item.conceptName); + concept.setConceptCode(item.conceptCode); + concept.setDomainName(item.domainId); + return concept; + }).collect(Collectors.toList()); + return new ConceptDocuments(conceptSet.getId(), documents); + } else { + return null; + } + } + } + + private class DocumentWriter implements ItemWriter { + @Override + public void write(List list) throws Exception { + list.forEach(cd -> conceptSetSearchService.reindexConceptSet(cd.conceptSetId, cd.documents)); + } + } + + public class JobStepExecutionListener implements StepExecutionListener { + @Override + public void beforeStep(StepExecution stepExecution) { + } + + @Override + public ExitStatus afterStep(StepExecution stepExecution) { + Object processedCount = stepExecution.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS); + if (processedCount != null) { + if ((Integer) processedCount != 0) { + // Subtract 1 if the value is not equal to zero because "beforeChunk" method is called + // even if there's no element to process, so we get total number of processed documents plus one + stepExecution.getJobExecution().getExecutionContext() + .put(REINDEX_PROCESSED_DOCUMENTS, ((Integer) processedCount) - 1); + } + } + return stepExecution.getExitStatus(); + } + } + + public class JobChunkListener implements ChunkListener { + private int counter = 0; + + @Override + public void beforeChunk(ChunkContext context) { + // Increment the number of processed documents before chunk because saving of step execution parameters + // is made before "afterChunk" is called + context.getStepContext().getStepExecution().getExecutionContext() + .put(REINDEX_PROCESSED_DOCUMENTS, ++counter); + } + + @Override + public void afterChunk(ChunkContext context) { + // This method is called after saving of step parameters, so we can't use it + } + + @Override + public void afterChunkError(ChunkContext context) { + } + } +} \ No newline at end of file diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexStatus.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexStatus.java index ef58e973d0..6ab1c79a29 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexStatus.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexStatus.java @@ -1,5 +1,5 @@ package org.ohdsi.webapi.conceptset.search; public enum ConceptSetReindexStatus { - UNAVAILABLE, CREATED, STARTED, COMPLETED, FAILED + UNAVAILABLE, CREATED, RUNNING, COMPLETED, FAILED } \ No newline at end of file diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexTasklet.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexTasklet.java deleted file mode 100644 index c0a34d6f40..0000000000 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexTasklet.java +++ /dev/null @@ -1,86 +0,0 @@ -package org.ohdsi.webapi.conceptset.search; - -import org.ohdsi.circe.vocabulary.ConceptSetExpression; -import org.ohdsi.webapi.Constants; -import org.ohdsi.webapi.conceptset.ConceptSet; -import org.ohdsi.webapi.exception.AtlasException; -import org.ohdsi.webapi.service.ConceptSetService; -import org.ohdsi.webapi.service.VocabularyService; -import org.ohdsi.webapi.vocabulary.Concept; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.batch.core.ExitStatus; -import org.springframework.batch.core.StepContribution; -import org.springframework.batch.core.scope.context.ChunkContext; -import org.springframework.batch.core.step.tasklet.Tasklet; -import org.springframework.batch.repeat.RepeatStatus; -import org.springframework.stereotype.Service; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Map; -import java.util.stream.Collectors; - -import static org.ohdsi.webapi.Constants.Params.SOURCE_KEY; - -@Service -public class ConceptSetReindexTasklet implements Tasklet { - protected final Logger log = LoggerFactory.getLogger(getClass()); - - private ConceptSetSearchService conceptSetSearchService; - - private ConceptSetService conceptSetService; - - private VocabularyService vocabService; - - public ConceptSetReindexTasklet(ConceptSetSearchService conceptSetSearchService, - ConceptSetService conceptSetService, - VocabularyService vocabService) { - this.conceptSetSearchService = conceptSetSearchService; - this.conceptSetService = conceptSetService; - this.vocabService = vocabService; - } - - @Override - public RepeatStatus execute(StepContribution stepContribution, ChunkContext chunkContext) throws Exception { - try { - fullIndex(chunkContext); - stepContribution.setExitStatus(ExitStatus.COMPLETED); - } catch (final Throwable ex) { - log.error(ex.getMessage(), ex); - stepContribution.setExitStatus(new ExitStatus(Constants.FAILED, ex.getMessage())); - throw new AtlasException(ex); - } - return RepeatStatus.FINISHED; - } - - private void fullIndex(ChunkContext chunkContext) { - Map jobParams = chunkContext.getStepContext().getJobParameters(); - String sourceKey = jobParams.get(SOURCE_KEY).toString(); - - final Collection documents = new ArrayList<>(); - - final Iterable conceptSets = conceptSetService.getConceptSetRepository().findAll(); - - conceptSets.forEach(cs -> { - final ConceptSetExpression csExpression = conceptSetService.getConceptSetExpression(cs.getId()); - final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); - - documents.addAll(concepts.stream().map(item -> { - final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); - concept.setConceptSetId(cs.getId()); - concept.setConceptId(item.conceptId); - concept.setConceptName(item.conceptName); - concept.setConceptCode(item.conceptCode); - concept.setDomainName(item.domainId); - return concept; - }).collect(Collectors.toList())); - - log.info("Concept set {} added to reindex", cs.getId()); - }); - - log.info("Full concept sets reindex start"); - conceptSetSearchService.indexConceptSetsFull(documents); - log.info("Full concept sets reindex finish"); - } -} \ No newline at end of file diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java index 4f9c67720f..4626590def 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java @@ -75,6 +75,15 @@ public void indexConceptSetsFull(final Collection docu } } + public void clearConceptSetIndex() { + try { + final SolrClient solrClient = solrSearchClient.getSolrClient(conceptSetsCore); + solrClient.deleteByQuery("*:*"); + } catch (final Exception e) { + log.error("SOLR error: concept set removing failed with message: {}", e.getMessage()); + } + } + public void reindexConceptSet(final Integer conceptSetId, final Collection documents) { try { final SolrClient solrClient = solrSearchClient.getSolrClient(conceptSetsCore); diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index c197995d0c..652ac56273 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -17,7 +17,6 @@ import org.apache.shiro.authz.UnauthorizedException; import org.ohdsi.circe.vocabulary.ConceptSetExpression; -import org.ohdsi.webapi.Constants; import org.ohdsi.webapi.check.CheckResult; import org.ohdsi.webapi.check.checker.conceptset.ConceptSetChecker; import org.ohdsi.webapi.conceptset.ConceptSet; @@ -26,13 +25,10 @@ import org.ohdsi.webapi.conceptset.ConceptSetGenerationInfoRepository; import org.ohdsi.webapi.conceptset.ConceptSetItem; import org.ohdsi.webapi.conceptset.dto.ConceptSetVersionFullDTO; -import org.ohdsi.webapi.conceptset.search.ConceptSetReindexStatus; -import org.ohdsi.webapi.conceptset.search.ConceptSetReindexTasklet; +import org.ohdsi.webapi.conceptset.search.ConceptSetReindexJobService; import org.ohdsi.webapi.conceptset.search.ConceptSetSearchDocument; import org.ohdsi.webapi.conceptset.search.ConceptSetSearchService; import org.ohdsi.webapi.exception.ConceptNotExistException; -import org.ohdsi.webapi.job.JobExecutionResource; -import org.ohdsi.webapi.job.JobTemplate; import org.ohdsi.webapi.security.PermissionService; import org.ohdsi.webapi.service.dto.ConceptSetDTO; import org.ohdsi.webapi.service.dto.ConceptSetReindexDTO; @@ -57,11 +53,6 @@ import org.ohdsi.webapi.versioning.dto.VersionUpdateDTO; import org.ohdsi.webapi.versioning.service.VersionService; import org.ohdsi.webapi.vocabulary.Concept; -import org.springframework.batch.core.Job; -import org.springframework.batch.core.JobParametersBuilder; -import org.springframework.batch.core.Step; -import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; -import org.springframework.batch.core.configuration.annotation.StepBuilderFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.convert.support.GenericConversionService; import org.springframework.dao.EmptyResultDataAccessException; @@ -95,8 +86,6 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; -import static org.ohdsi.webapi.Constants.Params.JOB_NAME; - /** * Provides REST services for working with * concept sets. @@ -142,21 +131,10 @@ public class ConceptSetService extends AbstractDaoService implements HasTags versionService; @Autowired - private StepBuilderFactory stepBuilderFactory; - - @Autowired - private JobTemplate jobTemplate; - - @Autowired - private JobBuilderFactory jobBuilders; - - @Autowired - private JobService jobService; + private ConceptSetReindexJobService conceptSetReindexJobService; public static final String COPY_NAME = "copyName"; - private static final String REINDEX_JOB_NAME = "reindexJob_%sourceKey%"; - /** * Get the concept set based in the identifier * @@ -908,7 +886,7 @@ public List listByTags(TagNameListRequestDTO requestDTO) { @Path("/searchAvailable") @GET public boolean isSearchAvailable() { - return true;//conceptSetSearchService.isSearchAvailable(); + return conceptSetSearchService.isSearchAvailable(); } /** @@ -951,21 +929,23 @@ public Collection executeSearch(@PathParam("sourceKey") String so @Produces(MediaType.APPLICATION_JSON) @Consumes(MediaType.APPLICATION_JSON) public ConceptSetReindexDTO fullIndex(@PathParam("sourceKey") String sourceKey) { - if (!conceptSetSearchService.isSearchAvailable()) { - return new ConceptSetReindexDTO(ConceptSetReindexStatus.UNAVAILABLE); - } - String jobName = REINDEX_JOB_NAME.replaceAll("sourceKey", sourceKey); - JobExecutionResource jobExecutionResource = jobService.findJobByName(jobName, jobName); - if (jobExecutionResource == null) { - startReindexJob(sourceKey); - return new ConceptSetReindexDTO(ConceptSetReindexStatus.CREATED); - } else if ("COMPLETED".equals(jobExecutionResource.getStatus())) { - return new ConceptSetReindexDTO(ConceptSetReindexStatus.COMPLETED); - } else if ("FAILED".equals(jobExecutionResource.getStatus())) { - return new ConceptSetReindexDTO(ConceptSetReindexStatus.FAILED); - } else { - return new ConceptSetReindexDTO(ConceptSetReindexStatus.STARTED); - } + return conceptSetReindexJobService.createIndex(sourceKey); + } + + /** + * Get status of reindexing of concept sets for search. + * + * @summary Get status of reindexing of concept sets for search. + * @param sourceKey The source key + * @param executionId The identifier of execution. In case of null value service will search active job + */ + @Path("{sourceKey}/index/{executionId}/status") + @GET + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + public ConceptSetReindexDTO fullIndexStatus(@PathParam("sourceKey") String sourceKey, + @PathParam("executionId") Long executionId) { + return conceptSetReindexJobService.getIndexStatus(sourceKey, executionId); } private void checkVersion(int id, int version) { @@ -992,24 +972,4 @@ private ConceptSetVersion saveVersion(int id) { version.setCreatedDate(versionDate); return versionService.create(VersionType.CONCEPT_SET, version); } - - private JobExecutionResource startReindexJob(String sourceKey) { - String jobName = REINDEX_JOB_NAME.replaceAll("sourceKey", sourceKey); - JobParametersBuilder parametersBuilder = new JobParametersBuilder(); - parametersBuilder.addString(JOB_NAME, jobName); - parametersBuilder.addString(Constants.Params.SOURCE_KEY, sourceKey); - - Job reindexJob = jobBuilders.get(jobName) - .start(getConceptSetReindexStep(jobName)) - .build(); - - return jobTemplate.launch(reindexJob, parametersBuilder.toJobParameters()); - } - - private Step getConceptSetReindexStep(String stepName) { - ConceptSetReindexTasklet tasklet = new ConceptSetReindexTasklet(conceptSetSearchService, this, vocabService); - return stepBuilderFactory.get(stepName) - .tasklet(tasklet) - .build(); - } } diff --git a/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java b/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java index 6cc639a7a5..8cf887f434 100644 --- a/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java +++ b/src/main/java/org/ohdsi/webapi/service/dto/ConceptSetReindexDTO.java @@ -4,11 +4,19 @@ public class ConceptSetReindexDTO { private ConceptSetReindexStatus status; + private int maxCount; + private int doneCount; + private long executionId; public ConceptSetReindexDTO(final ConceptSetReindexStatus status) { this.status = status; } + public ConceptSetReindexDTO(final ConceptSetReindexStatus status, final long executionId) { + this.status = status; + this.executionId = executionId; + } + public ConceptSetReindexStatus getStatus() { return status; } @@ -16,4 +24,28 @@ public ConceptSetReindexStatus getStatus() { public void setStatus(final ConceptSetReindexStatus status) { this.status = status; } + + public int getMaxCount() { + return maxCount; + } + + public void setMaxCount(int maxCount) { + this.maxCount = maxCount; + } + + public int getDoneCount() { + return doneCount; + } + + public void setDoneCount(int doneCount) { + this.doneCount = doneCount; + } + + public long getExecutionId() { + return executionId; + } + + public void setExecutionId(long executionId) { + this.executionId = executionId; + } } diff --git a/src/main/resources/i18n/messages_en.json b/src/main/resources/i18n/messages_en.json index 4ffdd8d900..1d63d36977 100644 --- a/src/main/resources/i18n/messages_en.json +++ b/src/main/resources/i18n/messages_en.json @@ -2446,7 +2446,8 @@ "clearLocalCache": "Local Storage has been cleared. Please refresh the page to reload configuration information.", "clearServerCache": "Server cache has been cleared.", "failUpdatePrioritySourceDaimon": "Failed to update priority source daimon", - "changeSource": "You are changing current source, we recommend you to reindex concept sets" + "changeSource": "You are changing current source, we recommend you to reindex concept sets", + "reindexRunning": "Reindexing of concept sets is currently in progress" }, "buttons": { "check": "check", @@ -2455,7 +2456,8 @@ "clearServerCache": "Clear Server Cache", "importUsers": "Import Users from LDAP/AD", "managePermissions": "Manage Permissions", - "reindexCS": "Concept Sets Reindex" + "reindexCS": "Concept Sets Reindex", + "reindexCSStatus": "Concept Sets Reindex (<%=doneCount%> of <%=maxCount%>)" }, "changeSourcePriorities": "Change source priorities in:", "confirms": { diff --git a/src/main/resources/i18n/messages_ko.json b/src/main/resources/i18n/messages_ko.json index e337feda26..f337749a0c 100644 --- a/src/main/resources/i18n/messages_ko.json +++ b/src/main/resources/i18n/messages_ko.json @@ -2446,7 +2446,8 @@ "clearLocalCache": "로컬 저장소가 지워졌습니다. 구성 정보를 다시 로드하려면 페이지를 새로 고침하세요.", "clearServerCache": "서버 캐시가 지워졌습니다.", "failUpdatePrioritySourceDaimon": "우선순위 소스(priority source) daimon을 업데이트하지 못했습니다.", - "changeSource": "You are changing current source, we recommend you to do reindexing concept sets" + "changeSource": "You are changing current source, we recommend you to do reindexing concept sets", + "reindexRunning": "Reindexing of concept sets is currently in progress" }, "buttons": { "check": "확인", @@ -2455,7 +2456,8 @@ "clearServerCache": "서버 캐시 지우기", "importUsers": "LDAP/AD에서 사용자 가져오기", "managePermissions": "사용 권한 관리", - "reindexCS": "Concept Sets Reindex" + "reindexCS": "Concept Sets Reindex", + "reindexCSStatus": "Concept Sets Reindex (<%=doneCount%> of <%=maxCount%>)" }, "changeSourcePriorities": "다음에서 소스 우선 순위 변경:", "confirms": { diff --git a/src/main/resources/i18n/messages_ru.json b/src/main/resources/i18n/messages_ru.json index 5af6cfc359..8240b70011 100644 --- a/src/main/resources/i18n/messages_ru.json +++ b/src/main/resources/i18n/messages_ru.json @@ -2453,7 +2453,8 @@ "managePermissions": "Управление разрешениями", "importUsers": "Импорт пользователей из LDAP/AD", "clearServerCache": "Очистить серверный кэш", - "reindexCS": "Переиндексация наборов концептов" + "reindexCS": "Переиндексация наборов концептов", + "reindexCSStatus": "Переиндексация наборов концептов (<%=doneCount%> из <%=maxCount%>)" }, "priorityOptions": { "session": "В текущей сессии", @@ -2463,7 +2464,8 @@ "clearLocalCache": "Локальное хранилище очищено. Пожалуйста, перезагрузите страницу, чтобы обновитьинформацию о конфигурации.", "clearServerCache": "Серверный кэш очищен", "failUpdatePrioritySourceDaimon": "Не удалось обновить демона приоритета источника", - "changeSource": "Вы меняете текущий источник. Мы рекомендуем вам сделать реиндексацию наборов концептов" + "changeSource": "Вы меняете текущий источник. Мы рекомендуем вам сделать реиндексацию наборов концептов", + "reindexRunning": "Реиндексация концепт-сетов в процессе" }, "confirms": { "clearServerCache": "Вы действительно хотите очистить серверный кэш?", diff --git a/src/main/resources/i18n/messages_zh.json b/src/main/resources/i18n/messages_zh.json index 8f513f4f82..b01eeb1d4b 100644 --- a/src/main/resources/i18n/messages_zh.json +++ b/src/main/resources/i18n/messages_zh.json @@ -2446,7 +2446,8 @@ "clearLocalCache": "本地存储已清除。 请刷新页面以重新加载配置信息。", "clearServerCache": "服务器缓存已清除。", "failUpdatePrioritySourceDaimon": "无法更新优先级源守护程序", - "changeSource": "You are changing current source, we recommend you to do reindexing concept sets" + "changeSource": "You are changing current source, we recommend you to do reindexing concept sets", + "reindexRunning": "Reindexing of concept sets is currently in progress" }, "buttons": { "check": "检查", @@ -2455,7 +2456,8 @@ "clearServerCache": "清除服务器缓存", "importUsers": "从LDAP / AD导入用户", "managePermissions": "管理权限", - "reindexCS": "Concept Sets Reindex" + "reindexCS": "Concept Sets Reindex", + "reindexCSStatus": "Concept Sets Reindex (<%=doneCount%> of <%=maxCount%>)" }, "changeSourcePriorities": "更改源优先级:", "confirms": { From 6de0796feaf3dc31070d3801adbbd3c5008a4707 Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Wed, 30 Nov 2022 14:58:42 +0300 Subject: [PATCH 14/27] added permissions for reindex status endpoint --- ...13.0.20221031170000__add_concept_set_search_permission.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql b/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql index 33b0acedc3..f84e6b8f39 100644 --- a/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql +++ b/src/main/resources/db/migration/postgresql/V2.13.0.20221031170000__add_concept_set_search_permission.sql @@ -4,6 +4,8 @@ INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES (nextval('${ohdsiSchema}.sec_permission_id_seq'), 'conceptset:*:search', 'Concept sets search permission'); INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES (nextval('${ohdsiSchema}.sec_permission_id_seq'), 'conceptset:*:index', 'Concept sets search index permission'); +INSERT INTO ${ohdsiSchema}.sec_permission(id, value, description) VALUES + (nextval('${ohdsiSchema}.sec_permission_id_seq'), 'conceptset:*:index:*:status', 'Concept sets search index status permission'); INSERT INTO ${ohdsiSchema}.sec_role_permission(id, role_id, permission_id) SELECT nextval('${ohdsiSchema}.sec_role_permission_sequence'), sr.id, sp.id @@ -13,4 +15,4 @@ WHERE sp.value in ('conceptset:searchAvailable', 'conceptset:*:search') AND sr.n INSERT INTO ${ohdsiSchema}.sec_role_permission(id, role_id, permission_id) SELECT nextval('${ohdsiSchema}.sec_role_permission_sequence'), sr.id, sp.id FROM ${ohdsiSchema}.sec_permission SP, ${ohdsiSchema}.sec_role sr -WHERE sp.value = 'conceptset:*:index' AND sr.name IN ('admin'); \ No newline at end of file +WHERE sp.value in ('conceptset:*:index', 'conceptset:*:index:*:status') AND sr.name IN ('admin'); \ No newline at end of file From e209b33c498234ccb786249a5948e99034b74ff4 Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Thu, 1 Dec 2022 09:14:11 +0300 Subject: [PATCH 15/27] Concept sets search - catch and ignore ConceptNotExistException on CS reindex --- .../search/ConceptSetReindexJobService.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java index c50a4fc9c2..cab7f7602e 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java @@ -3,6 +3,7 @@ import org.ohdsi.circe.vocabulary.ConceptSetExpression; import org.ohdsi.webapi.Constants; import org.ohdsi.webapi.conceptset.ConceptSet; +import org.ohdsi.webapi.exception.ConceptNotExistException; import org.ohdsi.webapi.job.JobExecutionResource; import org.ohdsi.webapi.job.JobTemplate; import org.ohdsi.webapi.service.ConceptSetService; @@ -181,7 +182,15 @@ public DocumentReader(String sourceKey, List conceptSets) { public ConceptDocuments read() throws Exception { if (iterator.hasNext()) { ConceptSet conceptSet = iterator.next(); - final ConceptSetExpression csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); + final ConceptSetExpression csExpression; + + try { + csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); + } catch (final ConceptNotExistException e) { + // data source does not contain required concepts, skip CS + return null; + } + final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); final List documents = concepts.stream().map(item -> { From a0f8b642e273559965e08fb2f904dd94ad17dca0 Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Thu, 1 Dec 2022 10:13:05 +0300 Subject: [PATCH 16/27] return empty list of documents when source does not contain required concepts --- .../webapi/conceptset/search/ConceptSetReindexJobService.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java index cab7f7602e..9b2d662756 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java @@ -30,6 +30,7 @@ import org.springframework.stereotype.Service; import java.util.Collection; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Optional; @@ -188,7 +189,7 @@ public ConceptDocuments read() throws Exception { csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); } catch (final ConceptNotExistException e) { // data source does not contain required concepts, skip CS - return null; + return new ConceptDocuments(conceptSet.getId(), Collections.emptyList()); } final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); From 70e149bcf06c4d68b7922a0fe767e90902f05de6 Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Thu, 1 Dec 2022 10:36:48 +0300 Subject: [PATCH 17/27] return empty list of documents when source does not contain required concepts --- .../search/ConceptSetReindexJobService.java | 70 ++++++++++++------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java index 9b2d662756..6c6bc1b4b2 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java @@ -159,14 +159,23 @@ private void fillCounts(ConceptSetReindexDTO conceptSetReindexDTO, JobExecution } private static class ConceptDocuments { - final int conceptSetId; + final Integer conceptSetId; final List documents; - private ConceptDocuments(int conceptSetId, List documents) { + private ConceptDocuments() { + this.conceptSetId = null; + this.documents = Collections.emptyList(); + } + + private ConceptDocuments(Integer conceptSetId, List documents) { this.conceptSetId = conceptSetId; this.documents = documents; } + + public boolean hasDataToProcess() { + return conceptSetId != null && documents != null && !documents.isEmpty(); + } } private class DocumentReader implements ItemReader { @@ -181,31 +190,36 @@ public DocumentReader(String sourceKey, List conceptSets) { @Override public ConceptDocuments read() throws Exception { - if (iterator.hasNext()) { - ConceptSet conceptSet = iterator.next(); - final ConceptSetExpression csExpression; - - try { - csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); - } catch (final ConceptNotExistException e) { - // data source does not contain required concepts, skip CS - return new ConceptDocuments(conceptSet.getId(), Collections.emptyList()); + try { + if (iterator.hasNext()) { + ConceptSet conceptSet = iterator.next(); + final ConceptSetExpression csExpression; + + try { + csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); + } catch (final ConceptNotExistException e) { + // data source does not contain required concepts, skip CS + return new ConceptDocuments(); + } + + final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); + + final List documents = concepts.stream().map(item -> { + final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); + concept.setConceptSetId(conceptSet.getId()); + concept.setConceptId(item.conceptId); + concept.setConceptName(item.conceptName); + concept.setConceptCode(item.conceptCode); + concept.setDomainName(item.domainId); + return concept; + }).collect(Collectors.toList()); + return new ConceptDocuments(conceptSet.getId(), documents); + } else { + return null; } - - final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); - - final List documents = concepts.stream().map(item -> { - final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); - concept.setConceptSetId(conceptSet.getId()); - concept.setConceptId(item.conceptId); - concept.setConceptName(item.conceptName); - concept.setConceptCode(item.conceptCode); - concept.setDomainName(item.domainId); - return concept; - }).collect(Collectors.toList()); - return new ConceptDocuments(conceptSet.getId(), documents); - } else { - return null; + } catch (Exception e) { + log.error("Failed to get data for processing, {}", e); + return new ConceptDocuments(); } } } @@ -213,7 +227,9 @@ public ConceptDocuments read() throws Exception { private class DocumentWriter implements ItemWriter { @Override public void write(List list) throws Exception { - list.forEach(cd -> conceptSetSearchService.reindexConceptSet(cd.conceptSetId, cd.documents)); + list.stream() + .filter(ConceptDocuments::hasDataToProcess) + .forEach(cd -> conceptSetSearchService.reindexConceptSet(cd.conceptSetId, cd.documents)); } } From eaf8f252d2a0d8f03f436a0d826028719ded4c4d Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Thu, 1 Dec 2022 10:38:18 +0300 Subject: [PATCH 18/27] return empty list of documents when source does not contain required concepts --- .../webapi/conceptset/search/ConceptSetReindexJobService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java index 6c6bc1b4b2..03392e438d 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java @@ -218,7 +218,7 @@ public ConceptDocuments read() throws Exception { return null; } } catch (Exception e) { - log.error("Failed to get data for processing, {}", e); + log.error("Failed to get data for processing", e); return new ConceptDocuments(); } } From e56345302f44e9c5d75159c02599dc85b69fc0f3 Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Thu, 1 Dec 2022 11:00:27 +0300 Subject: [PATCH 19/27] return empty list of documents when source does not contain required concepts --- .../search/ConceptSetReindexJobService.java | 93 +++++++++---------- 1 file changed, 44 insertions(+), 49 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java index 03392e438d..c5b540a227 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java @@ -116,7 +116,7 @@ private JobExecutionResource createReindexJob(String sourceKey) { .chunk(1) .reader(new DocumentReader(sourceKey, conceptSets)) .writer(new DocumentWriter()) - .listener(new JobStepExecutionListener()) + //.listener(new JobStepExecutionListener()) .listener(new JobChunkListener()) .build(); @@ -190,36 +190,31 @@ public DocumentReader(String sourceKey, List conceptSets) { @Override public ConceptDocuments read() throws Exception { - try { - if (iterator.hasNext()) { - ConceptSet conceptSet = iterator.next(); - final ConceptSetExpression csExpression; - - try { - csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); - } catch (final ConceptNotExistException e) { - // data source does not contain required concepts, skip CS - return new ConceptDocuments(); - } - - final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); - - final List documents = concepts.stream().map(item -> { - final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); - concept.setConceptSetId(conceptSet.getId()); - concept.setConceptId(item.conceptId); - concept.setConceptName(item.conceptName); - concept.setConceptCode(item.conceptCode); - concept.setDomainName(item.domainId); - return concept; - }).collect(Collectors.toList()); - return new ConceptDocuments(conceptSet.getId(), documents); - } else { - return null; + if (iterator.hasNext()) { + ConceptSet conceptSet = iterator.next(); + final ConceptSetExpression csExpression; + + try { + csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); + } catch (final ConceptNotExistException e) { + // data source does not contain required concepts, skip CS + return new ConceptDocuments(); } - } catch (Exception e) { - log.error("Failed to get data for processing", e); - return new ConceptDocuments(); + + final Collection concepts = vocabService.executeMappedLookup(sourceKey, csExpression); + + final List documents = concepts.stream().map(item -> { + final ConceptSetSearchDocument concept = new ConceptSetSearchDocument(); + concept.setConceptSetId(conceptSet.getId()); + concept.setConceptId(item.conceptId); + concept.setConceptName(item.conceptName); + concept.setConceptCode(item.conceptCode); + concept.setDomainName(item.domainId); + return concept; + }).collect(Collectors.toList()); + return new ConceptDocuments(conceptSet.getId(), documents); + } else { + return null; } } } @@ -233,25 +228,25 @@ public void write(List list) throws Exception { } } - public class JobStepExecutionListener implements StepExecutionListener { - @Override - public void beforeStep(StepExecution stepExecution) { - } - - @Override - public ExitStatus afterStep(StepExecution stepExecution) { - Object processedCount = stepExecution.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS); - if (processedCount != null) { - if ((Integer) processedCount != 0) { - // Subtract 1 if the value is not equal to zero because "beforeChunk" method is called - // even if there's no element to process, so we get total number of processed documents plus one - stepExecution.getJobExecution().getExecutionContext() - .put(REINDEX_PROCESSED_DOCUMENTS, ((Integer) processedCount) - 1); - } - } - return stepExecution.getExitStatus(); - } - } +// public class JobStepExecutionListener implements StepExecutionListener { +// @Override +// public void beforeStep(StepExecution stepExecution) { +// } +// +// @Override +// public ExitStatus afterStep(StepExecution stepExecution) { +// Object processedCount = stepExecution.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS); +// if (processedCount != null) { +// if ((Integer) processedCount != 0) { +// // Subtract 1 if the value is not equal to zero because "beforeChunk" method is called +// // even if there's no element to process, so we get total number of processed documents plus one +// stepExecution.getJobExecution().getExecutionContext() +// .put(REINDEX_PROCESSED_DOCUMENTS, ((Integer) processedCount) - 1); +// } +// } +// return stepExecution.getExitStatus(); +// } +// } public class JobChunkListener implements ChunkListener { private int counter = 0; From 246477b1b57fdb33f781670b9a3cb039c9937fff Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Thu, 1 Dec 2022 12:32:47 +0300 Subject: [PATCH 20/27] return empty list of documents when source does not contain required concepts --- .../search/ConceptSetReindexJobService.java | 40 +++++++++---------- .../search/ConceptSetSearchService.java | 1 + 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java index c5b540a227..b9a8ed8234 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java @@ -116,7 +116,7 @@ private JobExecutionResource createReindexJob(String sourceKey) { .chunk(1) .reader(new DocumentReader(sourceKey, conceptSets)) .writer(new DocumentWriter()) - //.listener(new JobStepExecutionListener()) + .listener(new JobStepExecutionListener()) .listener(new JobChunkListener()) .build(); @@ -228,25 +228,25 @@ public void write(List list) throws Exception { } } -// public class JobStepExecutionListener implements StepExecutionListener { -// @Override -// public void beforeStep(StepExecution stepExecution) { -// } -// -// @Override -// public ExitStatus afterStep(StepExecution stepExecution) { -// Object processedCount = stepExecution.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS); -// if (processedCount != null) { -// if ((Integer) processedCount != 0) { -// // Subtract 1 if the value is not equal to zero because "beforeChunk" method is called -// // even if there's no element to process, so we get total number of processed documents plus one -// stepExecution.getJobExecution().getExecutionContext() -// .put(REINDEX_PROCESSED_DOCUMENTS, ((Integer) processedCount) - 1); -// } -// } -// return stepExecution.getExitStatus(); -// } -// } + public class JobStepExecutionListener implements StepExecutionListener { + @Override + public void beforeStep(StepExecution stepExecution) { + } + + @Override + public ExitStatus afterStep(StepExecution stepExecution) { + Object processedCount = stepExecution.getExecutionContext().get(REINDEX_PROCESSED_DOCUMENTS); + if (processedCount != null) { + if ((Integer) processedCount != 0) { + // Subtract 1 if the value is not equal to zero because "beforeChunk" method is called + // even if there's no element to process, so we get total number of processed documents plus one + stepExecution.getJobExecution().getExecutionContext() + .put(REINDEX_PROCESSED_DOCUMENTS, ((Integer) processedCount) - 1); + } + } + return stepExecution.getExitStatus(); + } + } public class JobChunkListener implements ChunkListener { private int counter = 0; diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java index 4626590def..477476b4bd 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java @@ -79,6 +79,7 @@ public void clearConceptSetIndex() { try { final SolrClient solrClient = solrSearchClient.getSolrClient(conceptSetsCore); solrClient.deleteByQuery("*:*"); + solrClient.commit(); } catch (final Exception e) { log.error("SOLR error: concept set removing failed with message: {}", e.getMessage()); } From 8e94727672809c4387f3b749d4789e21fb6deabf Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Thu, 1 Dec 2022 14:53:01 +0300 Subject: [PATCH 21/27] return empty list of documents when source does not contain required concepts --- .../conceptset/search/ConceptSetReindexJobService.java | 8 ++------ .../org/ohdsi/webapi/service/ConceptSetService.java | 10 ++++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java index b9a8ed8234..84cbe020c8 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetReindexJobService.java @@ -3,7 +3,6 @@ import org.ohdsi.circe.vocabulary.ConceptSetExpression; import org.ohdsi.webapi.Constants; import org.ohdsi.webapi.conceptset.ConceptSet; -import org.ohdsi.webapi.exception.ConceptNotExistException; import org.ohdsi.webapi.job.JobExecutionResource; import org.ohdsi.webapi.job.JobTemplate; import org.ohdsi.webapi.service.ConceptSetService; @@ -192,11 +191,8 @@ public DocumentReader(String sourceKey, List conceptSets) { public ConceptDocuments read() throws Exception { if (iterator.hasNext()) { ConceptSet conceptSet = iterator.next(); - final ConceptSetExpression csExpression; - - try { - csExpression = conceptSetService.getConceptSetExpression(conceptSet.getId()); - } catch (final ConceptNotExistException e) { + final ConceptSetExpression csExpression = conceptSetService.getConceptSetExpressionOrNull(conceptSet.getId()); + if (csExpression == null) { // data source does not contain required concepts, skip CS return new ConceptDocuments(); } diff --git a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java index 652ac56273..abe220b819 100644 --- a/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java +++ b/src/main/java/org/ohdsi/webapi/service/ConceptSetService.java @@ -267,6 +267,16 @@ public ConceptSetExpression getConceptSetExpression(@PathParam("id") final int i return getConceptSetExpression(id, null, source.getSourceInfo()); } + @Transactional(dontRollbackOn = ConceptNotExistException.class) + public ConceptSetExpression getConceptSetExpressionOrNull(final int id) { + SourceInfo sourceInfo = sourceService.getPriorityVocabularySourceInfo(); + try { + return getConceptSetExpression(id, null, sourceInfo); + } catch (ConceptNotExistException e) { + return null; + } + } + private ConceptSetExpression getConceptSetExpression(int id, Integer version, SourceInfo sourceInfo) { HashMap map = new HashMap<>(); From f90e10ff107a092ada799903bac08db0d1c4aed5 Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Fri, 9 Dec 2022 11:33:41 +0300 Subject: [PATCH 22/27] solr functionalty was moved to sparate location --- .../HeraclesConfigurationInfo.java | 2 +- .../ConfigurationInfo.java | 2 +- .../extcommon/vocabulary/SearchProvider.java | 12 ++++ .../org/ohdsi/webapi/info/InfoService.java | 1 + .../person/PersonConfigurationInfo.java | 2 +- .../plugins/PluginsConfigurationInfo.java | 2 +- .../security/SecurityConfigurationInfo.java | 2 +- .../vocabulary/DatabaseSearchProvider.java | 18 ++++-- .../webapi/vocabulary/SearchProvider.java | 8 --- .../vocabulary/VocabularySearchService.java | 2 + .../VocabularySearchServiceImpl.java | 53 ++++----------- .../SolrConfigurationInfo.java} | 19 +++--- .../{ => solr}/SolrSearchClient.java | 3 +- .../{ => solr}/SolrSearchProvider.java | 64 ++++++++++++------- 14 files changed, 98 insertions(+), 92 deletions(-) rename src/main/java/org/ohdsi/webapi/{info => extcommon}/ConfigurationInfo.java (88%) create mode 100644 src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java delete mode 100644 src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java rename src/main/java/org/ohdsi/webapi/vocabulary/{VocabularyConfigurationInfo.java => solr/SolrConfigurationInfo.java} (55%) rename src/main/java/org/ohdsi/webapi/vocabulary/{ => solr}/SolrSearchClient.java (97%) rename src/main/java/org/ohdsi/webapi/vocabulary/{ => solr}/SolrSearchProvider.java (82%) diff --git a/src/main/java/org/ohdsi/webapi/cohortanalysis/HeraclesConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/cohortanalysis/HeraclesConfigurationInfo.java index 76c91d0f4b..69dfd1d7cd 100644 --- a/src/main/java/org/ohdsi/webapi/cohortanalysis/HeraclesConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/cohortanalysis/HeraclesConfigurationInfo.java @@ -1,6 +1,6 @@ package org.ohdsi.webapi.cohortanalysis; -import org.ohdsi.webapi.info.ConfigurationInfo; +import org.ohdsi.webapi.extcommon.ConfigurationInfo; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/info/ConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java similarity index 88% rename from src/main/java/org/ohdsi/webapi/info/ConfigurationInfo.java rename to src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java index 1068fc1072..b744af9b93 100644 --- a/src/main/java/org/ohdsi/webapi/info/ConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java @@ -1,4 +1,4 @@ -package org.ohdsi.webapi.info; +package org.ohdsi.webapi.extcommon; import java.util.HashMap; import java.util.Map; diff --git a/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java b/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java new file mode 100644 index 0000000000..dd538a3b97 --- /dev/null +++ b/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java @@ -0,0 +1,12 @@ +package org.ohdsi.webapi.extcommon.vocabulary; + +import org.ohdsi.webapi.vocabulary.Concept; +import org.ohdsi.webapi.vocabulary.SearchProviderConfig; + +import java.util.Collection; + +public interface SearchProvider { + boolean supports(String vocabularyVersionKey); + int getPriority(); + Collection executeSearch(SearchProviderConfig config, String query, String rows) throws Exception; +} diff --git a/src/main/java/org/ohdsi/webapi/info/InfoService.java b/src/main/java/org/ohdsi/webapi/info/InfoService.java index 732cceec42..d545fc36bd 100644 --- a/src/main/java/org/ohdsi/webapi/info/InfoService.java +++ b/src/main/java/org/ohdsi/webapi/info/InfoService.java @@ -24,6 +24,7 @@ import javax.ws.rs.core.MediaType; import org.apache.commons.lang3.StringUtils; +import org.ohdsi.webapi.extcommon.ConfigurationInfo; import org.springframework.boot.info.BuildProperties; import org.springframework.stereotype.Controller; diff --git a/src/main/java/org/ohdsi/webapi/person/PersonConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/person/PersonConfigurationInfo.java index 355c094245..5a65308a0f 100644 --- a/src/main/java/org/ohdsi/webapi/person/PersonConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/person/PersonConfigurationInfo.java @@ -1,6 +1,6 @@ package org.ohdsi.webapi.person; -import org.ohdsi.webapi.info.ConfigurationInfo; +import org.ohdsi.webapi.extcommon.ConfigurationInfo; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/plugins/PluginsConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/plugins/PluginsConfigurationInfo.java index d063f74044..b84ef07a0a 100644 --- a/src/main/java/org/ohdsi/webapi/plugins/PluginsConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/plugins/PluginsConfigurationInfo.java @@ -1,6 +1,6 @@ package org.ohdsi.webapi.plugins; -import org.ohdsi.webapi.info.ConfigurationInfo; +import org.ohdsi.webapi.extcommon.ConfigurationInfo; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/security/SecurityConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/security/SecurityConfigurationInfo.java index 5416645897..1ee902ee64 100644 --- a/src/main/java/org/ohdsi/webapi/security/SecurityConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/security/SecurityConfigurationInfo.java @@ -1,9 +1,9 @@ package org.ohdsi.webapi.security; import org.ohdsi.webapi.Constants; -import org.ohdsi.webapi.info.ConfigurationInfo; import org.ohdsi.webapi.shiro.management.AtlasRegularSecurity; import org.ohdsi.webapi.shiro.management.Security; +import org.ohdsi.webapi.extcommon.ConfigurationInfo; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/DatabaseSearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/DatabaseSearchProvider.java index 9f0e3d2936..6586fcc8ce 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/DatabaseSearchProvider.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/DatabaseSearchProvider.java @@ -1,25 +1,33 @@ package org.ohdsi.webapi.vocabulary; import java.util.Collection; -import java.util.Objects; + import org.ohdsi.webapi.service.VocabularyService; import org.ohdsi.webapi.util.PreparedStatementRenderer; +import org.ohdsi.webapi.extcommon.vocabulary.SearchProvider; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @Component public class DatabaseSearchProvider implements SearchProvider { + private final static int VOCABULARY_PRIORITY = Integer.MAX_VALUE; + @Autowired VocabularyService vocabService; @Override - public boolean supports(VocabularySearchProviderType type) { - return Objects.equals(type, VocabularySearchProviderType.DATABASE); + public boolean supports(String vocabularyVersionKey) { + return true; } - + + @Override + public int getPriority() { + return VOCABULARY_PRIORITY; + } + @Override public Collection executeSearch(SearchProviderConfig config, String query, String rows) throws Exception { - PreparedStatementRenderer psr = vocabService.prepareExecuteSearchWithQuery(query, config.getSource()); + PreparedStatementRenderer psr = vocabService.prepareExecuteSearchWithQuery(query, config.getSource()); return vocabService.getSourceJdbcTemplate(config.getSource()).query(psr.getSql(), psr.getSetter(), vocabService.getRowMapper()); } } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java deleted file mode 100644 index 78131bca82..0000000000 --- a/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java +++ /dev/null @@ -1,8 +0,0 @@ -package org.ohdsi.webapi.vocabulary; - -import java.util.Collection; - -public interface SearchProvider { - public abstract boolean supports(VocabularySearchProviderType type); - public abstract Collection executeSearch(SearchProviderConfig config, String query, String rows) throws Exception; -} diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchService.java b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchService.java index d1866043d0..a6036799d4 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchService.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchService.java @@ -1,5 +1,7 @@ package org.ohdsi.webapi.vocabulary; +import org.ohdsi.webapi.extcommon.vocabulary.SearchProvider; + public interface VocabularySearchService { SearchProvider getSearchProvider(SearchProviderConfig config); } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchServiceImpl.java b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchServiceImpl.java index 3841dd911a..b904664f15 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchServiceImpl.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchServiceImpl.java @@ -1,57 +1,30 @@ package org.ohdsi.webapi.vocabulary; -import java.util.HashSet; -import java.util.List; -import javax.annotation.PostConstruct; -import org.ohdsi.webapi.service.VocabularyService; +import java.util.Arrays; +import java.util.Comparator; +import org.ohdsi.webapi.extcommon.vocabulary.SearchProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @Service public class VocabularySearchServiceImpl implements VocabularySearchService { protected final Logger log = LoggerFactory.getLogger(getClass()); - private static HashSet availableVocabularyFullTextIndices = new HashSet<>(); - private final List searchProviderList; private static final String NO_PROVIDER_ERROR = "There is no vocabulary search provider which for sourceKey: %s"; - - @Autowired - VocabularyService vocabService; - - @Autowired - SolrSearchClient solrSearchClient; - - @PostConstruct - protected void init() { - // Get the SOLR cores list if enabled - if (solrSearchClient.enabled()) { - try { - availableVocabularyFullTextIndices = solrSearchClient.getCores(); - } catch (Exception ex) { - log.error("SOLR Core Initialization Error: WebAPI was unable to obtain the list of available cores.", ex); - } - } - } - - public VocabularySearchServiceImpl(List searchProviderList) { - this.searchProviderList = searchProviderList; + + private final SearchProvider[] searchProviders; + + public VocabularySearchServiceImpl(SearchProvider[] searchProviders) { + this.searchProviders = searchProviders; } - + @Override public SearchProvider getSearchProvider(SearchProviderConfig config) { - VocabularySearchProviderType type = VocabularySearchProviderType.DATABASE; - if (availableVocabularyFullTextIndices.contains(config.getVersionKey())) { - type = VocabularySearchProviderType.SOLR; - } - return selectSearchProvider(type, config); - } - - private SearchProvider selectSearchProvider(VocabularySearchProviderType type, SearchProviderConfig config) { - return searchProviderList.stream() - .filter(p -> p.supports(type)) + return Arrays.stream(searchProviders) + .sorted(Comparator.comparingInt(SearchProvider::getPriority)) + .filter(p -> p.supports(config.getVersionKey())) .findFirst() - .orElseThrow(() -> new RuntimeException(String.format(NO_PROVIDER_ERROR, config.getSource().getSourceKey()))); + .orElseThrow(() -> new RuntimeException(String.format(NO_PROVIDER_ERROR, config.getSource().getSourceKey()))); } } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularyConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrConfigurationInfo.java similarity index 55% rename from src/main/java/org/ohdsi/webapi/vocabulary/VocabularyConfigurationInfo.java rename to src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrConfigurationInfo.java index 3e4d78ea19..c9a6ef563e 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularyConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrConfigurationInfo.java @@ -1,22 +1,22 @@ -package org.ohdsi.webapi.vocabulary; +package org.ohdsi.webapi.vocabulary.solr; -import java.util.List; -import java.util.stream.Collectors; -import org.ohdsi.webapi.info.ConfigurationInfo; +import org.ohdsi.webapi.extcommon.ConfigurationInfo; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; -@Component -public class VocabularyConfigurationInfo extends ConfigurationInfo { +import java.util.ArrayList; +import java.util.List; +@Component +public class SolrConfigurationInfo extends ConfigurationInfo { private static final String KEY = "vocabulary"; @Autowired - public VocabularyConfigurationInfo(SolrSearchClient solrSearchClient) { - properties.put("solrEnabled", solrSearchClient.enabled()); + public SolrConfigurationInfo(SolrSearchClient solrSearchClient) { + properties.put("solrEnabled", true); if (solrSearchClient.enabled()) { try { - List cores = solrSearchClient.getCores().stream().collect(Collectors.toList()); + List cores = new ArrayList<>(solrSearchClient.getCores()); properties.put("cores", cores); } catch (Exception e) { properties.put("cores", "unable to retrieve from endpoint."); @@ -26,7 +26,6 @@ public VocabularyConfigurationInfo(SolrSearchClient solrSearchClient) { @Override public String getKey() { - return KEY; } } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchClient.java b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchClient.java similarity index 97% rename from src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchClient.java rename to src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchClient.java index e8833dd07b..6170c9f3f8 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchClient.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchClient.java @@ -1,9 +1,8 @@ -package org.ohdsi.webapi.vocabulary; +package org.ohdsi.webapi.vocabulary.solr; import java.util.Arrays; import java.util.HashSet; import java.util.List; -import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java similarity index 82% rename from src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchProvider.java rename to src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java index 98cb1b0e33..1c08142b24 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/SolrSearchProvider.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java @@ -1,17 +1,9 @@ -package org.ohdsi.webapi.vocabulary; +package org.ohdsi.webapi.vocabulary.solr; -import java.io.IOException; -import java.util.Date; -import java.text.ParseException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Objects; import org.apache.commons.beanutils.ConvertUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.lang3.time.DateFormatUtils; import org.apache.commons.lang3.time.DateUtils; -import org.springframework.stereotype.Component; - import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; @@ -19,22 +11,50 @@ import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import org.ohdsi.webapi.vocabulary.Concept; +import org.ohdsi.webapi.vocabulary.SearchProviderConfig; +import org.ohdsi.webapi.extcommon.vocabulary.SearchProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import javax.annotation.PostConstruct; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Date; +import java.util.HashSet; @Component public class SolrSearchProvider implements SearchProvider { protected final Logger log = LoggerFactory.getLogger(getClass()); - + + private static final int SOLR_PRIORITY = 1000; + private static HashSet solrCores = new HashSet<>(); + @Autowired SolrSearchClient solrSearchClient; - + + @PostConstruct + protected void init() { + try { + solrCores = solrSearchClient.getCores(); + } catch (Exception ex) { + log.error("SOLR Core Initialization Error: WebAPI was unable to obtain the list of available cores.", ex); + } + } + @Override - public boolean supports(VocabularySearchProviderType type) { - return Objects.equals(type, VocabularySearchProviderType.SOLR); + public boolean supports(String vocabularyVersionKey) { + return solrCores.contains(vocabularyVersionKey); } - + + @Override + public int getPriority() { + return SOLR_PRIORITY; + } + @Override public Collection executeSearch(SearchProviderConfig config, String query, String rows) throws IOException, SolrServerException { ArrayList concepts = new ArrayList<>(); @@ -45,7 +65,7 @@ public Collection executeSearch(SearchProviderConfig config, String que QueryResponse response; q.setStart(0); q.setRows(Integer.parseInt(rows)); - Boolean solrSearchError = false; + boolean solrSearchError = false; try { q.setQuery(solrSearchClient.formatSearchQuery(query)); response = client.query(q); @@ -56,7 +76,7 @@ public Collection executeSearch(SearchProviderConfig config, String que log.error("SOLR Search Query: \"" + query + "\" failed with message: " + rse.getMessage()); solrSearchError = true; } - + // If we did not receive results from issuing the initial wildcard // query OR there was an exception usually due to a maxBooleanClause // violation from doing a wildcard search on a very common term, then @@ -66,7 +86,7 @@ public Collection executeSearch(SearchProviderConfig config, String que response = client.query(q); results = response.getResults(); } - + for (int i = 0; i < results.size(); ++i) { SolrDocument d = results.get(i); Concept c = new Concept(); @@ -81,15 +101,15 @@ public Collection executeSearch(SearchProviderConfig config, String que c.validStartDate = convertObjectToDate(d.getFieldValue("valid_start_date")); c.validEndDate = convertObjectToDate(d.getFieldValue("valid_end_date")); concepts.add(c); - } - + } + return concepts; } - + protected String convertObjectToString(Object obj) { return convertObjectToString(obj, null); } - + protected String convertObjectToString(Object obj, String defaultValue) { String returnVal = ConvertUtils.convert(obj); if (defaultValue != null && returnVal == null) { @@ -97,7 +117,7 @@ protected String convertObjectToString(Object obj, String defaultValue) { } return returnVal; } - + protected Long convertObjectToLong(Object obj) { return NumberUtils.createLong(ConvertUtils.convert(obj)); } From 8008322240e475d9ddf3248e76cffc3fa6b9fb9e Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Tue, 13 Dec 2022 13:10:38 +0300 Subject: [PATCH 23/27] solr functionalty was moved to separate location --- .../java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java | 2 -- .../ohdsi/webapi/extcommon/vocabulary/SearchProvider.java | 2 ++ .../webapi/vocabulary/VocabularySearchProviderType.java | 6 ------ 3 files changed, 2 insertions(+), 8 deletions(-) delete mode 100644 src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchProviderType.java diff --git a/src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java index b744af9b93..48efe7d78b 100644 --- a/src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java @@ -4,13 +4,11 @@ import java.util.Map; public abstract class ConfigurationInfo { - protected final Map properties = new HashMap<>(); public abstract String getKey(); public Map getProperties() { - return properties; } } diff --git a/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java b/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java index dd538a3b97..db742a8a48 100644 --- a/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java +++ b/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java @@ -7,6 +7,8 @@ public interface SearchProvider { boolean supports(String vocabularyVersionKey); + int getPriority(); + Collection executeSearch(SearchProviderConfig config, String query, String rows) throws Exception; } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchProviderType.java b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchProviderType.java deleted file mode 100644 index fe1205bf68..0000000000 --- a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchProviderType.java +++ /dev/null @@ -1,6 +0,0 @@ -package org.ohdsi.webapi.vocabulary; - -public enum VocabularySearchProviderType { - DATABASE, - SOLR -} From 31698e70b06bb1acfbd4b1d9a33a63e4e2add570 Mon Sep 17 00:00:00 2001 From: Anton Abushkevich Date: Tue, 13 Dec 2022 16:00:37 +0300 Subject: [PATCH 24/27] Concept sets search - build fix & add maxResults parameter --- pom.xml | 1 + .../webapi/conceptset/search/ConceptSetSearchService.java | 6 +++++- src/main/resources/application.properties | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 78759b9e3b..7a3a40ce6a 100644 --- a/pom.xml +++ b/pom.xml @@ -71,6 +71,7 @@ {!complexphrase inOrder=true} 8.11.2 webapi-conceptsets + 1000 5 diff --git a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java index 477476b4bd..14a4b83755 100644 --- a/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java +++ b/src/main/java/org/ohdsi/webapi/conceptset/search/ConceptSetSearchService.java @@ -6,7 +6,7 @@ import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.common.SolrInputDocument; import org.ohdsi.webapi.service.dto.ConceptSetSearchDTO; -import org.ohdsi.webapi.vocabulary.SolrSearchClient; +import org.ohdsi.webapi.vocabulary.solr.SolrSearchClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -24,6 +24,9 @@ public class ConceptSetSearchService { @Value("${solr.conceptsets.core}") private String conceptSetsCore; + @Value("${solr.conceptsets.maxResults}") + private Integer maxResults; + @Autowired private SolrSearchClient solrSearchClient; @@ -44,6 +47,7 @@ public Set searchConceptSets(final ConceptSetSearchDTO dto) { try { final SolrQuery q = new SolrQuery(); q.setQuery(composeSearchQuery(dto)); + q.setRows(maxResults); q.add("group", "true"); q.add("group.field", "concept_set_id"); diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index be94cb4497..c924949601 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -97,6 +97,7 @@ security.cas.casticket=${security.cas.casticket} solr.endpoint = ${solr.endpoint} solr.query.prefix = ${solr.query.prefix} solr.conceptsets.core = ${solr.conceptsets.core} +solr.conceptsets.maxResults = ${solr.conceptsets.maxResults} # Enabling Compression compression=on compressableMimeType=application/json,application/xml,text/html,text/xml,text/plain From a2b19fc908f9d2779878b97421e81dd7f0715735 Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Wed, 8 Feb 2023 14:30:57 +0300 Subject: [PATCH 25/27] cherry pick from solr_refactoring --- .../HeraclesConfigurationInfo.java | 2 +- .../{extcommon => info}/ConfigurationInfo.java | 2 +- .../org/ohdsi/webapi/info/InfoService.java | 2 +- .../webapi/person/PersonConfigurationInfo.java | 2 +- .../plugins/PluginsConfigurationInfo.java | 2 +- .../security/SecurityConfigurationInfo.java | 2 +- .../webapi/service/VocabularyService.java | 3 ++- .../vocabulary/DatabaseSearchProvider.java | 12 +++++++++--- .../vocabulary/SearchProvider.java | 8 +++----- .../vocabulary/SearchProviderConfig.java | 18 +++++++----------- .../vocabulary/VocabularySearchService.java | 2 -- .../VocabularySearchServiceImpl.java | 4 ++-- .../vocabulary/solr/SolrConfigurationInfo.java | 2 +- .../vocabulary/solr/SolrSearchProvider.java | 4 ++-- 14 files changed, 32 insertions(+), 33 deletions(-) rename src/main/java/org/ohdsi/webapi/{extcommon => info}/ConfigurationInfo.java (88%) rename src/main/java/org/ohdsi/webapi/{extcommon => }/vocabulary/SearchProvider.java (51%) diff --git a/src/main/java/org/ohdsi/webapi/cohortanalysis/HeraclesConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/cohortanalysis/HeraclesConfigurationInfo.java index 69dfd1d7cd..76c91d0f4b 100644 --- a/src/main/java/org/ohdsi/webapi/cohortanalysis/HeraclesConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/cohortanalysis/HeraclesConfigurationInfo.java @@ -1,6 +1,6 @@ package org.ohdsi.webapi.cohortanalysis; -import org.ohdsi.webapi.extcommon.ConfigurationInfo; +import org.ohdsi.webapi.info.ConfigurationInfo; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/info/ConfigurationInfo.java similarity index 88% rename from src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java rename to src/main/java/org/ohdsi/webapi/info/ConfigurationInfo.java index 48efe7d78b..64d8817597 100644 --- a/src/main/java/org/ohdsi/webapi/extcommon/ConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/info/ConfigurationInfo.java @@ -1,4 +1,4 @@ -package org.ohdsi.webapi.extcommon; +package org.ohdsi.webapi.info; import java.util.HashMap; import java.util.Map; diff --git a/src/main/java/org/ohdsi/webapi/info/InfoService.java b/src/main/java/org/ohdsi/webapi/info/InfoService.java index d545fc36bd..0613bb5e68 100644 --- a/src/main/java/org/ohdsi/webapi/info/InfoService.java +++ b/src/main/java/org/ohdsi/webapi/info/InfoService.java @@ -24,7 +24,7 @@ import javax.ws.rs.core.MediaType; import org.apache.commons.lang3.StringUtils; -import org.ohdsi.webapi.extcommon.ConfigurationInfo; +import org.ohdsi.webapi.info.ConfigurationInfo; import org.springframework.boot.info.BuildProperties; import org.springframework.stereotype.Controller; diff --git a/src/main/java/org/ohdsi/webapi/person/PersonConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/person/PersonConfigurationInfo.java index 5a65308a0f..355c094245 100644 --- a/src/main/java/org/ohdsi/webapi/person/PersonConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/person/PersonConfigurationInfo.java @@ -1,6 +1,6 @@ package org.ohdsi.webapi.person; -import org.ohdsi.webapi.extcommon.ConfigurationInfo; +import org.ohdsi.webapi.info.ConfigurationInfo; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/plugins/PluginsConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/plugins/PluginsConfigurationInfo.java index b84ef07a0a..d063f74044 100644 --- a/src/main/java/org/ohdsi/webapi/plugins/PluginsConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/plugins/PluginsConfigurationInfo.java @@ -1,6 +1,6 @@ package org.ohdsi.webapi.plugins; -import org.ohdsi.webapi.extcommon.ConfigurationInfo; +import org.ohdsi.webapi.info.ConfigurationInfo; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/security/SecurityConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/security/SecurityConfigurationInfo.java index 1ee902ee64..36d9274e7e 100644 --- a/src/main/java/org/ohdsi/webapi/security/SecurityConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/security/SecurityConfigurationInfo.java @@ -3,7 +3,7 @@ import org.ohdsi.webapi.Constants; import org.ohdsi.webapi.shiro.management.AtlasRegularSecurity; import org.ohdsi.webapi.shiro.management.Security; -import org.ohdsi.webapi.extcommon.ConfigurationInfo; +import org.ohdsi.webapi.info.ConfigurationInfo; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/service/VocabularyService.java b/src/main/java/org/ohdsi/webapi/service/VocabularyService.java index 7864a78842..aa437ce3a0 100644 --- a/src/main/java/org/ohdsi/webapi/service/VocabularyService.java +++ b/src/main/java/org/ohdsi/webapi/service/VocabularyService.java @@ -660,7 +660,8 @@ public Collection executeSearch(@PathParam("sourceKey") String sourceKe try { Source source = getSourceRepository().findBySourceKey(sourceKey); VocabularyInfo vocabularyInfo = getInfo(sourceKey); - SearchProviderConfig searchConfig = new SearchProviderConfig(source, vocabularyInfo); + String versionKey = vocabularyInfo.version.replace(' ', '_'); + SearchProviderConfig searchConfig = new SearchProviderConfig(source.getSourceKey(), versionKey); concepts = vocabSearchService.getSearchProvider(searchConfig).executeSearch(searchConfig, query, rows); } catch (Exception ex) { log.error("An error occurred during the vocabulary search", ex); diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/DatabaseSearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/DatabaseSearchProvider.java index 6586fcc8ce..a25da3215b 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/DatabaseSearchProvider.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/DatabaseSearchProvider.java @@ -3,13 +3,17 @@ import java.util.Collection; import org.ohdsi.webapi.service.VocabularyService; +import org.ohdsi.webapi.source.Source; +import org.ohdsi.webapi.source.SourceRepository; import org.ohdsi.webapi.util.PreparedStatementRenderer; -import org.ohdsi.webapi.extcommon.vocabulary.SearchProvider; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @Component public class DatabaseSearchProvider implements SearchProvider { + @Autowired + private SourceRepository sourceRepository; + private final static int VOCABULARY_PRIORITY = Integer.MAX_VALUE; @Autowired @@ -27,7 +31,9 @@ public int getPriority() { @Override public Collection executeSearch(SearchProviderConfig config, String query, String rows) throws Exception { - PreparedStatementRenderer psr = vocabService.prepareExecuteSearchWithQuery(query, config.getSource()); - return vocabService.getSourceJdbcTemplate(config.getSource()).query(psr.getSql(), psr.getSetter(), vocabService.getRowMapper()); + Source source = sourceRepository.findBySourceKey(config.getSourceKey()); + + PreparedStatementRenderer psr = vocabService.prepareExecuteSearchWithQuery(query, source); + return vocabService.getSourceJdbcTemplate(source).query(psr.getSql(), psr.getSetter(), vocabService.getRowMapper()); } } diff --git a/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java similarity index 51% rename from src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java rename to src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java index db742a8a48..186b207cff 100644 --- a/src/main/java/org/ohdsi/webapi/extcommon/vocabulary/SearchProvider.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java @@ -1,14 +1,12 @@ -package org.ohdsi.webapi.extcommon.vocabulary; +package org.ohdsi.webapi.vocabulary; import org.ohdsi.webapi.vocabulary.Concept; import org.ohdsi.webapi.vocabulary.SearchProviderConfig; import java.util.Collection; -public interface SearchProvider { +public interface SearchProvider { boolean supports(String vocabularyVersionKey); - int getPriority(); - - Collection executeSearch(SearchProviderConfig config, String query, String rows) throws Exception; + Collection executeSearch(T config, String query, String rows) throws Exception; } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/SearchProviderConfig.java b/src/main/java/org/ohdsi/webapi/vocabulary/SearchProviderConfig.java index 962dfd5ace..3981645925 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/SearchProviderConfig.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/SearchProviderConfig.java @@ -1,23 +1,19 @@ package org.ohdsi.webapi.vocabulary; -import org.ohdsi.webapi.source.Source; - public class SearchProviderConfig { - protected Source source; - protected VocabularyInfo vocabularyInfo; - protected String versionKey; + private String sourceKey; + private String versionKey; - public SearchProviderConfig(Source source, VocabularyInfo vocabularyInfo) { - this.source = source; - this.vocabularyInfo = vocabularyInfo; - this.versionKey = vocabularyInfo.version.replace(' ', '_'); + public SearchProviderConfig(String sourceKey, String versionKey) { + this.sourceKey = sourceKey; + this.versionKey = versionKey; } public String getVersionKey() { return versionKey; } - public Source getSource() { - return source; + public String getSourceKey() { + return sourceKey; } } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchService.java b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchService.java index a6036799d4..d1866043d0 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchService.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchService.java @@ -1,7 +1,5 @@ package org.ohdsi.webapi.vocabulary; -import org.ohdsi.webapi.extcommon.vocabulary.SearchProvider; - public interface VocabularySearchService { SearchProvider getSearchProvider(SearchProviderConfig config); } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchServiceImpl.java b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchServiceImpl.java index b904664f15..49aa791917 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchServiceImpl.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/VocabularySearchServiceImpl.java @@ -2,7 +2,7 @@ import java.util.Arrays; import java.util.Comparator; -import org.ohdsi.webapi.extcommon.vocabulary.SearchProvider; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; @@ -25,6 +25,6 @@ public SearchProvider getSearchProvider(SearchProviderConfig config) { .sorted(Comparator.comparingInt(SearchProvider::getPriority)) .filter(p -> p.supports(config.getVersionKey())) .findFirst() - .orElseThrow(() -> new RuntimeException(String.format(NO_PROVIDER_ERROR, config.getSource().getSourceKey()))); + .orElseThrow(() -> new RuntimeException(String.format(NO_PROVIDER_ERROR, config.getSourceKey()))); } } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrConfigurationInfo.java b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrConfigurationInfo.java index c9a6ef563e..859d0510ee 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrConfigurationInfo.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrConfigurationInfo.java @@ -1,6 +1,6 @@ package org.ohdsi.webapi.vocabulary.solr; -import org.ohdsi.webapi.extcommon.ConfigurationInfo; +import org.ohdsi.webapi.info.ConfigurationInfo; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java index 1c08142b24..1f058ec01e 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java @@ -13,7 +13,7 @@ import org.apache.solr.common.SolrDocumentList; import org.ohdsi.webapi.vocabulary.Concept; import org.ohdsi.webapi.vocabulary.SearchProviderConfig; -import org.ohdsi.webapi.extcommon.vocabulary.SearchProvider; +import org.ohdsi.webapi.vocabulary.SearchProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -27,7 +27,7 @@ import java.util.HashSet; @Component -public class SolrSearchProvider implements SearchProvider { +public class SolrSearchProvider implements SearchProvider { protected final Logger log = LoggerFactory.getLogger(getClass()); private static final int SOLR_PRIORITY = 1000; From b8459e94627370c26fdd7ed9be87fa74924b4e26 Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Wed, 8 Feb 2023 14:30:57 +0300 Subject: [PATCH 26/27] fixed after review --- src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java | 4 ++-- .../org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java index 186b207cff..d733ba4d90 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/SearchProvider.java @@ -5,8 +5,8 @@ import java.util.Collection; -public interface SearchProvider { +public interface SearchProvider { boolean supports(String vocabularyVersionKey); int getPriority(); - Collection executeSearch(T config, String query, String rows) throws Exception; + Collection executeSearch(SearchProviderConfig config, String query, String rows) throws Exception; } diff --git a/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java index 1f058ec01e..632b541eea 100644 --- a/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java +++ b/src/main/java/org/ohdsi/webapi/vocabulary/solr/SolrSearchProvider.java @@ -27,7 +27,7 @@ import java.util.HashSet; @Component -public class SolrSearchProvider implements SearchProvider { +public class SolrSearchProvider implements SearchProvider { protected final Logger log = LoggerFactory.getLogger(getClass()); private static final int SOLR_PRIORITY = 1000; From 5e665d641ee11be0b4b3ee48e79944eaa3bde0fe Mon Sep 17 00:00:00 2001 From: ssuvorov-fls Date: Mon, 13 Feb 2023 16:36:38 +0300 Subject: [PATCH 27/27] merge master into current branch --- src/main/resources/i18n/messages_en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/i18n/messages_en.json b/src/main/resources/i18n/messages_en.json index 17e79a58d2..96a37585b5 100644 --- a/src/main/resources/i18n/messages_en.json +++ b/src/main/resources/i18n/messages_en.json @@ -2465,7 +2465,7 @@ "importUsers": "Import Users from LDAP/AD", "managePermissions": "Manage Permissions", "reindexCS": "Concept Sets Reindex", - "reindexCSStatus": "Concept Sets Reindex (<%=doneCount%> of <%=maxCount%>)" + "reindexCSStatus": "Concept Sets Reindex (<%=doneCount%> of <%=maxCount%>)", "tagManagement": "Tag Management" }, "changeSourcePriorities": "Change source priorities in:",