Skip to content

Commit

Permalink
First pass at PyOBO ingest.
Browse files Browse the repository at this point in the history
See #45
  • Loading branch information
cmungall committed Nov 23, 2022
1 parent 0178c2d commit 019f211
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 8 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ BUILDER_DIR = src/semsql/builder
DDL_DIR = $(BUILDER_DIR)/sql_schema
YAML_DIR = src/semsql/linkml
SQLA_DIR = src/semsql/sqla
ONT_REGISTRY = src/semsql/builder/registry/ontologies.yaml

PREFIX_DIR = $(BUILDER_DIR)/prefixes

Expand Down Expand Up @@ -141,7 +142,7 @@ download/reactome-biopax.zip:
src/semsql/builder/registry/registry_schema.py: src/semsql/builder/registry/registry_schema.yaml
$(RUN) gen-python $< > $@

ontologies.Makefile: src/semsql/builder/registry/ontologies.yaml
ontologies.Makefile: $(ONT_REGISTRY)
$(RUN) semsql generate-makefile -P src/semsql/builder/prefixes/prefixes_local.csv $< > $@.tmp && mv $@.tmp $@

include ontologies.Makefile
Expand Down
63 changes: 59 additions & 4 deletions ontologies.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ db/mlo.owl: download/mlo.owl


download/ito.owl: STAMP
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp ITO.owl > $@.tmp && rm $@.zip.tmp
curl -L -s https://github.com/OpenBioLink/ITO/raw/master/ITO.owl.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

Expand All @@ -219,7 +219,7 @@ db/ito.owl: download/ito.owl


download/reactome-Homo-sapiens.owl: STAMP
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp Homo_sapiens.owl > $@.tmp && rm $@.zip.tmp
curl -L -s https://reactome.org/download/current/biopax.zip > $@.zip.tmp && unzip -p $@.zip.tmp {ont.zip_extract_file} > $@.tmp && rm $@.zip.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

Expand Down Expand Up @@ -263,7 +263,7 @@ db/sweetAll.owl: download/sweetAll.owl


download/lov.owl: STAMP
curl -L -s https://lov.linkeddata.es/lov.n3.gz > $@.tmp
curl -L -s https://lov.linkeddata.es/lov.n3.gz | gzip -dc > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

Expand Down Expand Up @@ -306,6 +306,61 @@ db/co_324.owl: download/co_324.owl
cp $< $@


download/hgnc.genegroup.owl: STAMP
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc.genegroup/hgnc.genegroup.owl.gz | gzip -dc > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/hgnc.genegroup.owl

db/hgnc.genegroup.owl: download/hgnc.genegroup.owl
cp $< $@


download/hgnc.owl: STAMP
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc/2022-06-01/hgnc.owl.gz | gzip -dc > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/hgnc.owl

db/hgnc.owl: download/hgnc.owl
cp $< $@


download/dictybase.owl: STAMP
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/dictybase/dictybase.owl.gz | gzip -dc > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/dictybase.owl

db/dictybase.owl: download/dictybase.owl
cp $< $@


download/eccode.owl: STAMP
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/eccode/25-May-2022/eccode.owl.gz | gzip -dc > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/eccode.owl

db/eccode.owl: download/eccode.owl
cp $< $@


download/uniprot.owl: STAMP
curl -L -s https://github.com/biopragmatics/obo-db-ingest/raw/main/export/uniprot/2022_02/uniprot.owl.gz | gzip -dc > $@.tmp
sha256sum -b $@.tmp > $@.sha256
mv $@.tmp $@

.PRECIOUS: download/uniprot.owl

db/uniprot.owl: download/uniprot.owl
cp $< $@


download/%.owl: STAMP
curl -L -s http://purl.obolibrary.org/obo/$*.owl > $@.tmp
sha256sum -b $@.tmp > $@.sha256
Expand All @@ -316,4 +371,4 @@ download/%.owl: STAMP
db/%.owl: download/%.owl
robot merge -i $< -o $@

EXTRA_ONTOLOGIES = chiro ncit foodon chebiplus msio phenio comploinc bero aio reacto go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo edam sweetAll lov schema-dot-org cosmo co_324
EXTRA_ONTOLOGIES = chiro ncit foodon chebiplus msio phenio comploinc bero aio reacto go go-lego bao orcid cpont biolink biopax enanomapper mlo ito reactome-Homo-sapiens efo edam sweetAll lov schema-dot-org cosmo co_324 hgnc.genegroup hgnc dictybase eccode uniprot
7 changes: 6 additions & 1 deletion src/semsql/builder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from semsql.builder.registry import registry_schema
from semsql.builder.registry.registry_schema import (Makefile, MakefileRule,
Ontology)
Ontology, CompressionEnum)
from semsql.utils.makefile_utils import makefile_to_string

this_path = Path(__file__).parent
Expand Down Expand Up @@ -137,6 +137,11 @@ def compile_registry(registry_path: str, local_prefix_file: TextIO = None) -> st
"unzip -p [email protected] {ont.zip_extract_file} "
"> [email protected] && rm [email protected]"
)
elif ont.compression:
if str(ont.compression) == str(CompressionEnum.gzip.text):
command = f"curl -L -s {ont.url} | gzip -dc > [email protected]"
else:
raise ValueError(f"Unknown compression: '{ont.compression}'")
else:
command = f"curl -L -s {ont.url} > [email protected]"
download_rule = MakefileRule(
Expand Down
6 changes: 6 additions & 0 deletions src/semsql/builder/prefixes/prefixes.csv
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
old.fix,http://purl.org/obo/owl/FIX#
mlo,http://www.a2rd.net.br/mlo#
co_324,https://cropontology.org/rdf/CO_324:
hgnc.genegroup,http://purl.obolibrary.org/obo/hgnc.genegroup_
hgnc,http://purl.obolibrary.org/obo/hgnc_
hgnc.genegroup,http://purl.obolibrary.org/obo/dictybase_
EC,http://purl.obolibrary.org/obo/eccode_
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
RBO,http://purl.obolibrary.org/obo/RBO_
CLYH,http://purl.obolibrary.org/obo/CLYH_
RO,http://purl.obolibrary.org/obo/RO_
Expand Down
6 changes: 6 additions & 0 deletions src/semsql/builder/prefixes/prefixes_local.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ evs.ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#
old.fix,http://purl.org/obo/owl/FIX#
mlo,http://www.a2rd.net.br/mlo#
co_324,https://cropontology.org/rdf/CO_324:
hgnc.genegroup,http://purl.obolibrary.org/obo/hgnc.genegroup_
hgnc,http://purl.obolibrary.org/obo/hgnc_
hgnc.genegroup,http://purl.obolibrary.org/obo/dictybase_
EC,http://purl.obolibrary.org/obo/eccode_
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
uniprot.obo,http://purl.obolibrary.org/obo/uniprot_
31 changes: 29 additions & 2 deletions src/semsql/builder/registry/ontologies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,32 @@ ontologies:
url: https://cropontology.org/ontology/CO_324/rdf
prefixmap:
co_324: "https://cropontology.org/rdf/CO_324:"



## PyOBO products
## See https://github.com/INCATools/semantic-sql/issues/45
hgnc.genegroup:
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc.genegroup/hgnc.genegroup.owl.gz
compression: gzip
prefixmap:
hgnc.genegroup: http://purl.obolibrary.org/obo/hgnc.genegroup_
hgnc:
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/hgnc/2022-06-01/hgnc.owl.gz
compression: gzip
prefixmap:
hgnc: http://purl.obolibrary.org/obo/hgnc_
dictybase:
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/dictybase/dictybase.owl.gz
compression: gzip
prefixmap:
hgnc.genegroup: http://purl.obolibrary.org/obo/dictybase_
eccode:
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/eccode/25-May-2022/eccode.owl.gz
compression: gzip
prefixmap:
EC: http://purl.obolibrary.org/obo/eccode_
uniprot.obo: http://purl.obolibrary.org/obo/uniprot_
uniprot:
url: https://github.com/biopragmatics/obo-db-ingest/raw/main/export/uniprot/2022_02/uniprot.owl.gz
compression: gzip
prefixmap:
uniprot.obo: http://purl.obolibrary.org/obo/uniprot_

0 comments on commit 019f211

Please sign in to comment.