Skip to content

Commit

Permalink
structured_format.pattern-style permissible value creation eg for MIxS (
Browse files Browse the repository at this point in the history
#785)

* crude MIXS style permissible value illustration

* not comitting expanded schema

* --pv-syntax

* imports order fixed by ruff

* remove experiemtnatil files from root

* test including exec()

* use linkml_runtime.utils.compile_python not exec()

* meaningful test function name
  • Loading branch information
turbomam committed Jul 31, 2024
1 parent 97f9047 commit 8d3d2db
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 11 deletions.
48 changes: 37 additions & 11 deletions src/oaklib/utilities/subsets/value_set_expander.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABC
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterator, List, Union
from typing import Iterator, List, Optional, Union

import click
from linkml_runtime.dumpers import json_dumper
Expand Down Expand Up @@ -43,6 +43,7 @@ def expand_value_set(
value_set: Union[EnumDefinition, AnonymousEnumExpression],
schema: SchemaDefinition = None,
source_enum_definition: EnumDefinition = None,
pv_syntax: Optional[str] = None,
) -> Iterator[PermissibleValue]:
"""
Expand a value set definition into a list of curies
Expand Down Expand Up @@ -71,22 +72,31 @@ def expand_value_set(
vset = schema.enums[inherited]
pvs.extend(
self.expand_value_set(
vset, schema=schema, source_enum_definition=source_enum_definition
vset,
schema=schema,
source_enum_definition=source_enum_definition,
pv_syntax=pv_syntax,
)
)
if value_set.include:
for include in value_set.include:
if isinstance(include, AnonymousEnumExpression):
pvs.extend(
self.expand_value_set(
include, schema=schema, source_enum_definition=source_enum_definition
include,
schema=schema,
source_enum_definition=source_enum_definition,
pv_syntax=pv_syntax,
)
)
else:
raise ValueError(f"Unexpected type for include: {type(include)}")
pvs.extend(
self.expand_value_set(
include, schema=schema, source_enum_definition=source_enum_definition
include,
schema=schema,
source_enum_definition=source_enum_definition,
pv_syntax=pv_syntax,
)
)
if value_set.concepts:
Expand Down Expand Up @@ -136,7 +146,11 @@ def expand_value_set(
rq.source_nodes, predicates=predicates, reflexive=rq.include_self
)
for curie in results:
pvs.append(self._generate_permissible_value(curie, oi, source_enum_definition))
pvs.append(
self._generate_permissible_value(
curie, oi, source_enum_definition, pv_syntax=pv_syntax
)
)
else:
raise NotImplementedError(f"Must be an OboGraphInterface: {type(oi)}")
if value_set.minus:
Expand All @@ -145,7 +159,10 @@ def expand_value_set(
minus_vs, AnonymousEnumExpression
):
for pv in self.expand_value_set(
minus_vs, schema=schema, source_enum_definition=source_enum_definition
minus_vs,
schema=schema,
source_enum_definition=source_enum_definition,
pv_syntax=pv_syntax,
):
if pv in pvs:
pvs.remove(pv)
Expand Down Expand Up @@ -181,14 +198,17 @@ def _generate_permissible_value(
curie: CURIE,
oi: BasicOntologyInterface,
enum_definition: EnumDefinition = None,
pv_syntax: Optional[str] = None,
) -> PermissibleValue:
definition = oi.definition(curie)
# \n can break some downstream tooling like LinkML's gen-pydantic (v1.6.6)
if definition is not None:
definition = definition.replace("\n", " ")
label = oi.label(curie)
pv_formula = enum_definition.pv_formula if enum_definition else None
if str(pv_formula) == "CURIE":
if pv_syntax is not None:
text = pv_syntax.format(id=curie, label=label, definition=definition)
elif str(pv_formula) == "CURIE":
text = curie
elif str(pv_formula) == "LABEL":
# not all ontologies will have text for every element
Expand All @@ -209,6 +229,7 @@ def expand_in_place(
schema_path: Union[str, Path],
value_set_names: List[str] = None,
output_path: Union[str, Path] = None,
pv_syntax: Optional[str] = None,
) -> SchemaDefinition:
"""
Expand value sets in place
Expand All @@ -231,7 +252,7 @@ def expand_in_place(
if value_set_name not in schema.enums:
raise ValueError(f"Unknown value set: {value_set_name}")
value_set = schema.enums[value_set_name]
pvs = list(self.expand_value_set(value_set, schema=schema))
pvs = list(self.expand_value_set(value_set, schema=schema, pv_syntax=pv_syntax))
yaml_obj["enums"][value_set_name]["permissible_values"] = {
str(pv.text): json_dumper.to_dict(pv) for pv in pvs
}
Expand Down Expand Up @@ -259,8 +280,11 @@ def main(verbose: int, quiet: bool):
@click.option("-c", "--config", type=click.Path(exists=True))
@click.option("-s", "--schema", type=click.Path(exists=True))
@click.option("-o", "--output", type=click.Path())
@click.option("--pv-syntax", help="Enter a LinkML structured_pattern.syntax-style string ")
# # add a boolean click option with --mixs-style and (default) --no-mixs-style options
# @click.option("-m", "--mixs-style", is_flag=True, default=False)
@click.argument("value_set_names", nargs=-1)
def expand(config: str, schema: str, value_set_names: List[str], output: str):
def expand(config: str, schema: str, value_set_names: List[str], output: str, pv_syntax: str):
"""
Expand a value set. EXPERIMENTAL.
Expand All @@ -281,17 +305,19 @@ def expand(config: str, schema: str, value_set_names: List[str], output: str):
such as BioPortal or Wikidata. However, note that not all backends are capable of being able to
render all value sets.
Example:
Examples:
-------
vskit expand -c config.yaml -s schema.yaml -o expanded.yaml my_value_set1 my_value_set2
vskit expand -s schema.yaml -o expanded.yaml --pv-syntax '{label} [{id}] my_value_set1
"""
value_set_names = None if not value_set_names else value_set_names
expander = ValueSetExpander()
if config:
expander.configuration = yaml_loader.load(config, target_class=ValueSetConfiguration)
expander.expand_in_place(
schema_path=schema, value_set_names=value_set_names, output_path=output
schema_path=schema, value_set_names=value_set_names, output_path=output, pv_syntax=pv_syntax
)


Expand Down
96 changes: 96 additions & 0 deletions tests/test_pv_syntax_expander.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import contextlib
import tempfile

from linkml.generators import PythonGenerator
from linkml_runtime import SchemaView
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime.loaders import yaml_loader
from linkml_runtime.utils.compile_python import compile_python

from src.oaklib.utilities.subsets.value_set_expander import ValueSetExpander

SCHEMA_STRING = """
name: label_id_schema
id: http://example.com/label_id_schema
imports:
- linkml:types
prefixes:
label_id_schema: http://example.com/label_id_schema/
linkml: https://w3id.org/linkml/
ENVO: http://purl.obolibrary.org/obo/ENVO_
default_prefix: label_id_schema
enums:
TerrestrialBiomeEnum:
minus:
- reachable_from:
source_ontology: obo:envo
source_nodes:
- ENVO:00002030
relationship_types:
- rdfs:subClassOf
is_direct: false
reachable_from:
source_ontology: obo:envo
source_nodes:
- ENVO:00000428
relationship_types:
- rdfs:subClassOf
is_direct: false
slots:
name:
range: string
id:
range: string
required: true
biome:
range: TerrestrialBiomeEnum
classes:
NamedThing:
slots:
- name
- id
Sample:
is_a: NamedThing
slots:
- biome
"""

DATA_STRING = """
name: Sample-1
id: sample_1
biome: tundra biome [ENVO:01000180]
"""


def test_pv_syntax_expander():
view = SchemaView(SCHEMA_STRING)

with contextlib.ExitStack() as stack:
temp_dynamic = stack.enter_context(tempfile.NamedTemporaryFile(mode="w+t", delete=False))
temp_expanded = stack.enter_context(tempfile.NamedTemporaryFile(mode="w+t", delete=False))

yaml_dumper.dump(view.schema, temp_dynamic.name)

expander = ValueSetExpander()

expander.expand_in_place(
schema_path=temp_dynamic.name,
pv_syntax="{label} [{id}]",
output_path=temp_expanded.name,
)

expanded_view = SchemaView(temp_expanded.name)

generator = PythonGenerator(expanded_view.schema)

python_code = generator.serialize()

module_name = "test_module"

module = compile_python(python_code, module_name)

Sample = module.Sample

sample_1 = yaml_loader.loads(source=DATA_STRING, target_class=Sample)

assert sample_1 is not None

0 comments on commit 8d3d2db

Please sign in to comment.