diff --git a/src/oaklib/utilities/subsets/value_set_expander.py b/src/oaklib/utilities/subsets/value_set_expander.py index ac6c2532b..29b388802 100644 --- a/src/oaklib/utilities/subsets/value_set_expander.py +++ b/src/oaklib/utilities/subsets/value_set_expander.py @@ -2,7 +2,7 @@ from abc import ABC from dataclasses import dataclass, field from pathlib import Path -from typing import Iterator, List, Union +from typing import Iterator, List, Optional, Union import click from linkml_runtime.dumpers import json_dumper @@ -43,6 +43,7 @@ def expand_value_set( value_set: Union[EnumDefinition, AnonymousEnumExpression], schema: SchemaDefinition = None, source_enum_definition: EnumDefinition = None, + pv_syntax: Optional[str] = None, ) -> Iterator[PermissibleValue]: """ Expand a value set definition into a list of curies @@ -71,7 +72,10 @@ def expand_value_set( vset = schema.enums[inherited] pvs.extend( self.expand_value_set( - vset, schema=schema, source_enum_definition=source_enum_definition + vset, + schema=schema, + source_enum_definition=source_enum_definition, + pv_syntax=pv_syntax, ) ) if value_set.include: @@ -79,14 +83,20 @@ def expand_value_set( if isinstance(include, AnonymousEnumExpression): pvs.extend( self.expand_value_set( - include, schema=schema, source_enum_definition=source_enum_definition + include, + schema=schema, + source_enum_definition=source_enum_definition, + pv_syntax=pv_syntax, ) ) else: raise ValueError(f"Unexpected type for include: {type(include)}") pvs.extend( self.expand_value_set( - include, schema=schema, source_enum_definition=source_enum_definition + include, + schema=schema, + source_enum_definition=source_enum_definition, + pv_syntax=pv_syntax, ) ) if value_set.concepts: @@ -136,7 +146,11 @@ def expand_value_set( rq.source_nodes, predicates=predicates, reflexive=rq.include_self ) for curie in results: - pvs.append(self._generate_permissible_value(curie, oi, source_enum_definition)) + pvs.append( + self._generate_permissible_value( + curie, oi, source_enum_definition, pv_syntax=pv_syntax + ) + ) else: raise NotImplementedError(f"Must be an OboGraphInterface: {type(oi)}") if value_set.minus: @@ -145,7 +159,10 @@ def expand_value_set( minus_vs, AnonymousEnumExpression ): for pv in self.expand_value_set( - minus_vs, schema=schema, source_enum_definition=source_enum_definition + minus_vs, + schema=schema, + source_enum_definition=source_enum_definition, + pv_syntax=pv_syntax, ): if pv in pvs: pvs.remove(pv) @@ -181,6 +198,7 @@ def _generate_permissible_value( curie: CURIE, oi: BasicOntologyInterface, enum_definition: EnumDefinition = None, + pv_syntax: Optional[str] = None, ) -> PermissibleValue: definition = oi.definition(curie) # \n can break some downstream tooling like LinkML's gen-pydantic (v1.6.6) @@ -188,7 +206,9 @@ def _generate_permissible_value( definition = definition.replace("\n", " ") label = oi.label(curie) pv_formula = enum_definition.pv_formula if enum_definition else None - if str(pv_formula) == "CURIE": + if pv_syntax is not None: + text = pv_syntax.format(id=curie, label=label, definition=definition) + elif str(pv_formula) == "CURIE": text = curie elif str(pv_formula) == "LABEL": # not all ontologies will have text for every element @@ -209,6 +229,7 @@ def expand_in_place( schema_path: Union[str, Path], value_set_names: List[str] = None, output_path: Union[str, Path] = None, + pv_syntax: Optional[str] = None, ) -> SchemaDefinition: """ Expand value sets in place @@ -231,7 +252,7 @@ def expand_in_place( if value_set_name not in schema.enums: raise ValueError(f"Unknown value set: {value_set_name}") value_set = schema.enums[value_set_name] - pvs = list(self.expand_value_set(value_set, schema=schema)) + pvs = list(self.expand_value_set(value_set, schema=schema, pv_syntax=pv_syntax)) yaml_obj["enums"][value_set_name]["permissible_values"] = { str(pv.text): json_dumper.to_dict(pv) for pv in pvs } @@ -259,8 +280,11 @@ def main(verbose: int, quiet: bool): @click.option("-c", "--config", type=click.Path(exists=True)) @click.option("-s", "--schema", type=click.Path(exists=True)) @click.option("-o", "--output", type=click.Path()) +@click.option("--pv-syntax", help="Enter a LinkML structured_pattern.syntax-style string ") +# # add a boolean click option with --mixs-style and (default) --no-mixs-style options +# @click.option("-m", "--mixs-style", is_flag=True, default=False) @click.argument("value_set_names", nargs=-1) -def expand(config: str, schema: str, value_set_names: List[str], output: str): +def expand(config: str, schema: str, value_set_names: List[str], output: str, pv_syntax: str): """ Expand a value set. EXPERIMENTAL. @@ -281,17 +305,19 @@ def expand(config: str, schema: str, value_set_names: List[str], output: str): such as BioPortal or Wikidata. However, note that not all backends are capable of being able to render all value sets. - Example: + Examples: ------- vskit expand -c config.yaml -s schema.yaml -o expanded.yaml my_value_set1 my_value_set2 + vskit expand -s schema.yaml -o expanded.yaml --pv-syntax '{label} [{id}] my_value_set1 + """ value_set_names = None if not value_set_names else value_set_names expander = ValueSetExpander() if config: expander.configuration = yaml_loader.load(config, target_class=ValueSetConfiguration) expander.expand_in_place( - schema_path=schema, value_set_names=value_set_names, output_path=output + schema_path=schema, value_set_names=value_set_names, output_path=output, pv_syntax=pv_syntax ) diff --git a/tests/test_pv_syntax_expander.py b/tests/test_pv_syntax_expander.py new file mode 100644 index 000000000..90f0ad581 --- /dev/null +++ b/tests/test_pv_syntax_expander.py @@ -0,0 +1,96 @@ +import contextlib +import tempfile + +from linkml.generators import PythonGenerator +from linkml_runtime import SchemaView +from linkml_runtime.dumpers import yaml_dumper +from linkml_runtime.loaders import yaml_loader +from linkml_runtime.utils.compile_python import compile_python + +from src.oaklib.utilities.subsets.value_set_expander import ValueSetExpander + +SCHEMA_STRING = """ +name: label_id_schema +id: http://example.com/label_id_schema +imports: + - linkml:types +prefixes: + label_id_schema: http://example.com/label_id_schema/ + linkml: https://w3id.org/linkml/ + ENVO: http://purl.obolibrary.org/obo/ENVO_ +default_prefix: label_id_schema +enums: + TerrestrialBiomeEnum: + minus: + - reachable_from: + source_ontology: obo:envo + source_nodes: + - ENVO:00002030 + relationship_types: + - rdfs:subClassOf + is_direct: false + reachable_from: + source_ontology: obo:envo + source_nodes: + - ENVO:00000428 + relationship_types: + - rdfs:subClassOf + is_direct: false +slots: + name: + range: string + id: + range: string + required: true + biome: + range: TerrestrialBiomeEnum +classes: + NamedThing: + slots: + - name + - id + Sample: + is_a: NamedThing + slots: + - biome +""" + +DATA_STRING = """ +name: Sample-1 +id: sample_1 +biome: tundra biome [ENVO:01000180] +""" + + +def test_pv_syntax_expander(): + view = SchemaView(SCHEMA_STRING) + + with contextlib.ExitStack() as stack: + temp_dynamic = stack.enter_context(tempfile.NamedTemporaryFile(mode="w+t", delete=False)) + temp_expanded = stack.enter_context(tempfile.NamedTemporaryFile(mode="w+t", delete=False)) + + yaml_dumper.dump(view.schema, temp_dynamic.name) + + expander = ValueSetExpander() + + expander.expand_in_place( + schema_path=temp_dynamic.name, + pv_syntax="{label} [{id}]", + output_path=temp_expanded.name, + ) + + expanded_view = SchemaView(temp_expanded.name) + + generator = PythonGenerator(expanded_view.schema) + + python_code = generator.serialize() + + module_name = "test_module" + + module = compile_python(python_code, module_name) + + Sample = module.Sample + + sample_1 = yaml_loader.loads(source=DATA_STRING, target_class=Sample) + + assert sample_1 is not None