Skip to content

Commit

Permalink
add template for dataset GitHub Actions workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
vemonet committed Jan 18, 2021
1 parent 9b9b15f commit c2a2a26
Show file tree
Hide file tree
Showing 2 changed files with 233 additions and 0 deletions.
139 changes: 139 additions & 0 deletions .github/workflows/r2rml-map-date.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
name: DATE to BioLink RDF
# TODO: Use graphdb as tmp triplestore?
on:
workflow_dispatch:
inputs:
endpoint:
description: 'Upload to SPARQL endpoint'
required: true
default: 'https://graphdb.dumontierlab.com/repositories/ncats-red-kg/statements'
graph:
description: 'In the Graph'
required: true
default: 'https://w3id.org/d2s/graph/date'
jobs:
generate-rdf:
runs-on: ubuntu-latest
# runs-on: [self-hosted, linux, X64, node2]
services:
virtuoso:
image: umids/d2s-virtuoso:latest
ports:
- 8890:8890
env:
DBA_PASSWORD: dba
SPARQL_UPDATE: true
VIRT_Parameters_DirsAllowed: ., /, /usr/local/virtuoso-opensource/share/virtuoso/vad, /usr/local/virtuoso-opensource/var/lib/virtuoso/db
VIRT_SPARQL_ResultSetMaxRows: 999999999999999999
VIRT_SPARQL_MaxQueryCostEstimationTime: 0
VIRT_SPARQL_MaxQueryExecutionTime: 14400
VIRT_VDB_VDBDisconnectTimeout: 7200000
VIRT_Client_SQL_QUERY_TIMEOUT: 14400000
VIRT_CLient_SQL_TXN_TIMEOUT: 14400000
VIRT_Database_ErrorLogLevel: 0 # default: 7 is maximum logs
VIRT_Parameters_Timeout: 7200
VIRT_Parameters_TransactionAfterImageLimit: 5000000000 # default is 50M
VIRT_Parameters_NumberOfBuffers: 493674

outputs:
rdf-output: ${{ steps.stepupload.outputs.rdf_output }}
steps:
- uses: actions/checkout@v2

- name: Download CSV
run: datasets/date/download/download.sh

# - name: Upload CSV input artifact
# id: uploadcsv
# uses: actions/upload-artifact@v1
# with:
# name: preppi-csv
# path: preppi.csv

- name: Run AutoR2RML
uses: vemonet/[email protected]
with:
mapping: datasets/preppi/mapping/map-preppi.rml.ttl
output: rdf-preppi.nt
env:
JAVA_OPTS: "-Xmx5g"
JAVA_TOOL_OPTIONS: "-XX:MaxRAMPercentage=80"

- name: Run R2RML
uses: vemonet/[email protected]
with:
mapping: datasets/preppi/mapping/map-preppi.rml.ttl
output: rdf-preppi.nt
env:
JAVA_OPTS: "-Xmx5g"
JAVA_TOOL_OPTIONS: "-XX:MaxRAMPercentage=80"

- name: Upload AutoR2RML RDF
uses: MaastrichtU-IDS/RdfUpload@master
with:
file: rdf-output/rdf-preppi.nt
endpoint: http://virtuoso:8890/sparql
user: dba
password: dba
graph: https://w3id.org/d2s/graph/autor2rml

- name: Run SPARQL queries to convert DATE
uses: vemonet/sparql-operations-action@v1
with:
file: datasets/date/mapping
endpoint: ${{ github.event.inputs.endpoint }}
user: ${{ secrets.GRAPHDB_USER }}
password: ${{ secrets.GRAPHDB_PASSWORD }}
inputvar: https://sparql.uniprot.org
outputvar: ${{ github.event.inputs.graph }}
servicevar: https://sparql.uniprot.org

- name: Upload RDF output artifact
id: stepupload
uses: actions/upload-artifact@v1
with:
name: rdf-output
path: rdf-preppi.nt

upload-rdf:
runs-on: ubuntu-latest
needs: generate-rdf
steps:
- uses: actions/checkout@v2

- name: Get RDF output artifact
uses: actions/download-artifact@v1
with:
name: rdf-output

- name: Upload RDF
uses: MaastrichtU-IDS/RdfUpload@master
with:
file: rdf-output/rdf-preppi.nt
endpoint: ${{ github.event.inputs.endpoint }}
user: ${{ secrets.GRAPHDB_USER }}
password: ${{ secrets.GRAPHDB_PASSWORD }}
graph: ${{ github.event.inputs.graph }}

- name: Compute and insert HCLS descriptive metadata
uses: vemonet/sparql-operations-action@v1
with:
file: https://github.com/MaastrichtU-IDS/d2s-scripts-repository/tree/master/sparql/compute-hcls-stats
endpoint: ${{ github.event.inputs.endpoint }}
user: ${{ secrets.GRAPHDB_USER }}
password: ${{ secrets.GRAPHDB_PASSWORD }}
inputvar: ${{ github.event.inputs.graph }}
outputvar: https://w3id.org/d2s/metadata
servicevar: ${{ github.event.inputs.endpoint }}

- name: Run RDF to HDT
uses: vemonet/rdfhdt-action@master
with:
input: rdf-output/rdf-preppi.nt
output: hdt-preppi.hdt

- name: Upload HDT output artifact
uses: actions/upload-artifact@v1
with:
name: hdt-output
path: hdt-preppi.hdt
94 changes: 94 additions & 0 deletions support/template/rml-map-dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
name: Convert $dataset_id to RDF
on:
workflow_dispatch:
inputs:
endpoint:
description: 'Upload to SPARQL endpoint'
required: true
default: 'https://graphdb.dumontierlab.com/repositories/test/statements'
graph:
description: 'In the Graph'
required: true
default: 'https://w3id.org/d2s/graph/$dataset_id'

jobs:

generate-rdf:
runs-on: ubuntu-latest
outputs:
rdf-output: ${{ steps.stepupload.outputs.rdf_output }}

steps:
- uses: actions/checkout@v2

- name: Download data
run: datasets/$dataset_id/download/download.sh

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.7

- name: Install Python dependencies
run: |
python -m pip install -r requirements.txt
- name: Run preprocessing Python script
run: python datasets/$dataset_id/download/preprocessing.py

- name: Run RML mapper to generate RDF
uses: vemonet/[email protected]
with:
mapping: datasets/$dataset_id/mapping/mappings.rml.ttl
output: rdf-output.nt

- name: Upload RDF output artifact to GitHub
id: stepupload
uses: actions/upload-artifact@v1
with:
name: rdf-output
path: rdf-output.nt

upload-rdf:
runs-on: ubuntu-latest
needs: generate-rdf

steps:
- uses: actions/checkout@v2

- name: Get RDF output artifact from previous job
uses: actions/download-artifact@v1
with:
name: rdf-output

- name: Generate HDT compressed file from RDF output
uses: vemonet/rdfhdt-action@master
with:
input: rdf-output/rdf-geonames.nt
output: hdt-geonames.hdt

- name: Upload HDT output artifact to GitHub
uses: actions/upload-artifact@v1
with:
name: hdt-output
path: hdt-geonames.hdt

- name: Upload RDF to the defined SPARQL endpoint
uses: MaastrichtU-IDS/RdfUpload@master
with:
file: rdf-output/*.nt
endpoint: ${{ github.event.inputs.endpoint }}
user: ${{ secrets.GRAPHDB_USER }}
password: ${{ secrets.GRAPHDB_PASSWORD }}
graph: ${{ github.event.inputs.graph }}

- name: Compute and insert HCLS descriptive metadata
uses: vemonet/sparql-operations-action@v1
with:
file: https://github.com/MaastrichtU-IDS/d2s-scripts-repository/tree/master/sparql/compute-hcls-stats
endpoint: ${{ github.event.inputs.endpoint }}
user: ${{ secrets.GRAPHDB_USER }}
password: ${{ secrets.GRAPHDB_PASSWORD }}
inputvar: ${{ github.event.inputs.graph }}
outputvar: https://w3id.org/d2s/metadata
servicevar: ${{ github.event.inputs.endpoint }}

0 comments on commit c2a2a26

Please sign in to comment.