diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e9f65fd..6a9378c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,29 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.3.0] - 2024-07-04 + +### `Changed` + +- Reformatted QCSummary fields and added a QCMessage field containing the old summary message. See [PR 85](https://github.com/phac-nml/mikrokondo/pull/85) + +- Changed default Python3 image to use the StarAMR image. See [PR 90](https://github.com/phac-nml/mikrokondo/pull/90) + +- Stripped identifier from taxonomic identification from string. See [PR 90](https://github.com/phac-nml/mikrokondo/pull/90) + +- Removed retry logic from processes and switched them to ignore. See [PR 91](https://github.com/phac-nml/mikrokondo/pull/91) + +### `Fixed` + +- Updated samtools/minimap2 container fixing CI issues and issues running the pipeline with Docker. See [PR 85](https://github.com/phac-nml/mikrokondo/pull/85) + +- Removed task.maxRetries from error handling to prevent StackOverflow addressing [PR 91](https://github.com/phac-nml/mikrokondo/pull/91) + +### `Added` + +- Altered name of stored `SpeciesTopHit` field in the irida-next.config, and added a field displaying the field name used addressing [PR 90](https://github.com/phac-nml/mikrokondo/pull/90) + + ## [0.2.1] - 2024-06-03 ### `Fixed` @@ -87,6 +110,7 @@ Initial release of phac-nml/mikrokondo. Mikrokondo currently supports: read trim - Added integration testing using [nf-test](https://www.nf-test.com/). +[0.3.0]: https://github.com/phac-nml/mikrokondo/releases/tag/0.3.0 [0.2.1]: https://github.com/phac-nml/mikrokondo/releases/tag/0.2.1 [0.2.0]: https://github.com/phac-nml/mikrokondo/releases/tag/0.2.0 [0.1.2]: https://github.com/phac-nml/mikrokondo/releases/tag/0.1.2 diff --git a/bin/report_summaries.py b/bin/report_summaries.py index 94f4922c..a250f753 100755 --- a/bin/report_summaries.py +++ b/bin/report_summaries.py @@ -6,7 +6,7 @@ Matthew Wells: 2023-09-22 """ from dataclasses import dataclass -from typing import Dict, Union +from typing import Dict, Optional from collections import defaultdict import os import argparse @@ -22,14 +22,16 @@ class CleaningInfo: trim_field int: when split on a delimiter which section of the list to keep """ field: str - keep: Union[str, None] = None - trim_field: Union[int, None] = None + keep: Optional[str] = None + trim_field: Optional[int] = None class JsonImport: """Intake json report to convert to CSV""" __key_order = {v.field: v for v in [CleaningInfo(field="QCStatus"), CleaningInfo(field="QCSummary"), + CleaningInfo(field="QCParameterSelection"), + CleaningInfo(field="QCMessage"), CleaningInfo(field="QualityAnalysis", keep="message", trim_field=1), CleaningInfo(field="meta")]} __keep_keys = frozenset(__key_order.keys()) diff --git a/conf/irida_next.config b/conf/irida_next.config index 3eb6bd2f..8de000de 100755 --- a/conf/irida_next.config +++ b/conf/irida_next.config @@ -62,7 +62,8 @@ iridanext { "QualityAnalysis.nr_contigs.value" : "nr contigs Value", "QCSummary" : "QC Summary", "meta.downsampled" : "Downsampled", - "SpeciesTopHit" : "Species", + "SpeciesTopHit" : "predicted_identification_name", + "IdentificationMethod" : "predicted_identification_method", "ECTyperSubtyping.0.Database" : "ECTyper Database", "ECTyperSubtyping.0.Evidence" : "ECTyper Evidence", "ECTyperSubtyping.0.GeneCoverages(%)" : "ECTyper GeneCoverages (%)", @@ -128,6 +129,7 @@ iridanext { ] keep = [ "QCStatus", + "QCSummary", "QualityAnalysis.checkm_contamination.qc_status", "QualityAnalysis.checkm_contamination.value", "QualityAnalysis.average_coverage.qc_status", @@ -140,7 +142,7 @@ iridanext { "QualityAnalysis.length.value", "QualityAnalysis.nr_contigs.qc_status", "QualityAnalysis.nr_contigs.value", - "QCSummary", + "IdentificationMethod", "meta.downsampled", "SpeciesTopHit", "ECTyperSubtyping.0.Database", diff --git a/conf/modules.config b/conf/modules.config index ba4b9083..35fb9465 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -337,7 +337,8 @@ process { ext.parameters = params.quast stageInMode = params.stage_in_mode // scratch = false - errorStrategy = { task.attempt <= task.maxRetries ? sleep(Math.pow(2, task.attempt) * 200 as long) && 'retry' : 'ignore' } + //errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + errorStrategy = 'ignore' maxForks = 10 // Quast can get overloaded by job subs, so needs to be limited publishDir = [ [ @@ -356,7 +357,8 @@ process { //container = params.checkm.container ext.parameters = params.checkm stageInMode = params.stage_in_mode - errorStrategy = { task.attempt <= task.maxRetries ? sleep(Math.pow(2, task.attempt) * 200 as long) && 'retry' : 'finish' } + //errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + errorStrategy = 'ignore' // scratch = false publishDir = [ [ @@ -373,7 +375,8 @@ process { ext.args = "" //container = params.bandage.container ext.parameters = params.bandage - errorStrategy = { task.attempt <= task.maxRetries ? sleep(Math.pow(2, task.attempt) * 200 as long) && 'retry' : 'ignore' } + //errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + errorStrategy = 'ignore' stageInMode = params.stage_in_mode // scratch = false publishDir = [ @@ -523,8 +526,8 @@ process { withName: MASH_SCREEN { def dir_out = null - errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long) - return 'retry' } + //errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + errorStrategy = 'ignore' maxForks = 20 maxErrors = 3 ext.args = "-w" @@ -567,7 +570,8 @@ process { } withName: FLYE_ASSEMBLE { - errorStrategy = { task.exitStatus in [140] ? 'retry' : 'ignore'} + //errorStrategy = { task.exitStatus in [140] ? 'retry' : 'ignore'} + errorStrategy = 'ignore' //container = params.flye.container ext.parameters = params.flye ext.args = params.flye.args @@ -623,8 +627,8 @@ process { maxRetries = 3 ext.args = "" ext.parameters = params.spades + //errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } errorStrategy = 'ignore' - //errorStrategy = { task.attempt <= task.maxRetries && sleep(Math.pow(2, task.attempt) * 200 as long) ? 'retry' : 'ignore' } // scratch = false publishDir = [ [ @@ -787,7 +791,8 @@ process { // scratch = false ext.parameters = params.pilon_iterative maxRetries = 3 - errorStrategy = { task.attempt <= task.maxRetries ? sleep(Math.pow(2, task.attempt) * 200 as long) && 'retry' : 'ignore' } + //errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + errorStrategy = 'ignore' publishDir = [ [ path: { ["${task.assembly_polishing_directory_name}", "Pilon", "Fasta"].join(File.separator) }, diff --git a/modules/local/report.nf b/modules/local/report.nf index adf1fe77..36533527 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -35,6 +35,7 @@ process REPORT{ def data_stride = 3 // report values added in groups of three, e.g sample meta info, parameters, output file of interest def headers_list = 'headers' // ! TODO this string exists twice, need to fix that def arr_size = test_in.size() + def qc_species_tag = "QCParameterSelection" for(long i = 0; i < arr_size; i=i+data_stride){ def meta_data = test_in[i] def report_tag = test_in[i+1] @@ -76,8 +77,8 @@ process REPORT{ def search_phrases = qc_params_species() // Add in quality information in place - generate_qc_data(sample_data, search_phrases) - create_action_call(sample_data) + generate_qc_data(sample_data, search_phrases, qc_species_tag) + create_action_call(sample_data, qc_species_tag) def json_converted_data = new JsonBuilder(sample_data).toPrettyString() @@ -124,6 +125,7 @@ def generate_coverage_data(sample_data, bp_field, species){ // Add fixed genome coverage for species if desired def species_data_pos = 1; if(base_counts_p + && species[species_data_pos] != null && species[species_data_pos].containsKey("fixed_genome_size") && species[species_data_pos].fixed_genome_size != null){ @@ -197,7 +199,7 @@ def populate_qual_message(qual_data){ } // Action: Reisolate and resequence, resequence, all good. -def create_action_call(sample_data){ +def create_action_call(sample_data, species_tag){ /*Define criteria used to create base sketches TODO Need to test a falthrough sample (e.g. unspeciated to see what happens) @@ -224,6 +226,9 @@ def create_action_call(sample_data){ TODO creating a logic heavy function that needs to be refactored + + For addressing the defect, the Passed and failed messeges have been broken up, all that remains is to have the + final summary, checks passed and checks failed */ for(val in sample_data){ @@ -245,8 +250,7 @@ def create_action_call(sample_data){ final_message = "[FAILED] Sample was determined to be metagenomic, and this was not specied as" + " a metagenomic run indicating contamination REISOLATION AND RESEQUENCING RECOMMENDED." + "There is additionally a possibility that your sample could not be identified as it is novel and " + - "not included in the mash sketch provided to the pipeline (however this would be very rare), "+ - "but if this is the case please disregard this message." + "not included in the program used to taxonomically classify your pipeline (however this is an unlikely culprit)." } sample_data[val.key]["QCStatus"] = sample_status sample_data[val.key]["QCSummary"] = final_message @@ -279,7 +283,6 @@ def create_action_call(sample_data){ if(!meta_data.assembly){ // We should have reads as we assembled it if(qual_data && qual_data.containsKey("raw_average_quality") && !qual_data.raw_average_quality.status){ - //qual_message.add(params.QCReportFields.raw_average_quality.low_msg) resequence += 1 checks_failed += 1 }else if (qual_data && (!qual_data.containsKey("raw_average_quality") || !qual_data.raw_average_quality.status)){ @@ -290,7 +293,7 @@ def create_action_call(sample_data){ checks += 1 if(qual_data && qual_data.containsKey("average_coverage") && !qual_data.average_coverage.status){ - //qual_message.add(params.QCReportFields.average_coverage.low_msg) + if(meta_data.downsampled){ qual_message.add("The sample may have been downsampled too aggressively, if this is the cause please re-run sample with a different target depth.") } @@ -341,7 +344,6 @@ def create_action_call(sample_data){ checks += 1 - (reisolate, resequence) = n50_nrcontigs_decision(qual_data, nr_contigs_failed, n50_failed, qual_message, reisolate, resequence) //qual_message.add("Quality Conclusion") @@ -364,17 +366,26 @@ def create_action_call(sample_data){ qual_message.add("[PASSED] All Checks passed") sample_status = "PASSED" } - qual_message.add("Passed Tests: ${checks - checks_failed - checks_ignored}/${checks}") + def organism_criteria = sample_data[val.key][species_tag] + def tests_passed = "Passed Tests: ${checks - checks_failed - checks_ignored}/${checks}" + qual_message.add(tests_passed) - qual_message.add("Species ID: ${val.value[val.key][params.top_hit_species.report_tag]}") + def species_id = "Species ID: ${val.value[val.key][params.top_hit_species.report_tag]}" + qual_message.add(species_id) // Qual summary not final message final_message = qual_message.join("\n") def terminal_message = populate_qual_message(qual_data).join("\n") log.info "\n$val.key\n${terminal_message}\n${sample_status}\n${final_message}" + + // Reseq recommended should go to a seperate field + // Requested output should be: [PASS|FAILED] Species ID: [species] [Tests passed] [Organism criteria available] + qc_message = "${sample_status} ${species_id}; ${tests_passed}; Organism QC Criteria: ${organism_criteria}" + + sample_data[val.key]["QCSummary"] = qc_message sample_data[val.key]["QCStatus"] = sample_status - sample_data[val.key]["QCSummary"] = final_message + sample_data[val.key]["QCMessage"] = final_message } } @@ -604,7 +615,8 @@ def get_species(value, search_phrases, shortest_token){ shortest_token: contains values to scrub from value to be searched for */ - def qc_data = null; + + def qc_data = [params.QCReport.fallthrough.search, params.QCReport.fallthrough]; if(value == null){ return qc_data } @@ -626,7 +638,6 @@ def get_species(value, search_phrases, shortest_token){ def get_qc_data_species(value_data, qc_data){ def quality_messages = [:] - params.QCReportFields.each{ k, v -> if(v.on){ // only use the fields specified in the config @@ -649,7 +660,7 @@ def get_qc_data_species(value_data, qc_data){ return quality_messages; } -def generate_qc_data(data, search_phrases){ +def generate_qc_data(data, search_phrases, qc_species_tag){ /* data: sample data in a LazyMap search_phrases: normalized search phrases from the nextflow.config @@ -659,12 +670,13 @@ def generate_qc_data(data, search_phrases){ def top_hit_tag = params.top_hit_species.report_tag; def quality_analysis = "QualityAnalysis" def shortest_token = get_shortest_token(search_phrases) + def species_tag_location = 0 for(k in data){ if(!k.value.meta.metagenomic){ def species = get_species(k.value[k.key][top_hit_tag], search_phrases, shortest_token) - //generate_coverage_data(data[k.key], params.seqtk_size.report_tag, species) // update coverage first so its values can be used in generating qc messages generate_coverage_data(data[k.key], params.coverage_calc_fields.bp_field, species) // update coverage first so its values can be used in generating qc messages data[k.key][quality_analysis] = get_qc_data_species(k.value[k.key], species) + data[k.key][qc_species_tag] = species[species_tag_location] }else{ data[k.key][quality_analysis] = ["Metagenomic": ["message": null, "status": false]] data[k.key][quality_analysis]["Metagenomic"].message = "The sample was determined to be metagenomic, summary metrics will not be generated" + @@ -865,7 +877,7 @@ def table_values(file_path, header_p, seperator, headers=null){ } } - return rows_list.indexed().collectEntries { idx, row -> + return rows_list.indexed().collectEntries { idx, row -> [(idx): row.collectEntries { k, v -> [(k): replace_missing(v)] }] } } diff --git a/nextflow.config b/nextflow.config index 2dd1498c..f6675468 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,7 +43,7 @@ params { show_hidden_params = false validationS3PathCheck = true validationShowHiddenParams = false - validationSchemaIgnoreParams = 'abricate,locidex,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler' + validationSchemaIgnoreParams = 'top_hit_method,abricate,locidex,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler' validationFailUnrecognisedParams = false // for the qcreport fields // SKIP options @@ -193,8 +193,8 @@ params { // Python container, May switch for pypy3 python3 { - singularity = "docker.io/python:3.11.6" - docker = "docker.io/python:3.11.6" + singularity = "quay.io/biocontainers/staramr:0.10.0--pyhdfd78af_0" + docker = "quay.io/biocontainers/staramr:0.10.0--pyhdfd78af_0" } seqtk { @@ -424,14 +424,14 @@ params { report_tag = "SpeciesTopHit" } - kraken_species { - report_tag = "Kraken2TopHit" + top_hit_method { + report_tag = "IdentificationMethod" } r_contaminants { // container contains minimap2 and samtools - singularity = "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0" - docker = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:8f2087d838e5270cd83b5a016667234429f16eea-0" + singularity = "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" + docker = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" phix_fa = "" homo_sapiens_fa = "" pacbio_mg = "" @@ -1083,7 +1083,7 @@ manifest { description = """Mikrokondo""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.2.1' + version = '0.3.0' defaultBranch = 'main' doi = '' } diff --git a/subworkflows/local/determine_species.nf b/subworkflows/local/determine_species.nf index a99f6cc7..c7671d64 100644 --- a/subworkflows/local/determine_species.nf +++ b/subworkflows/local/determine_species.nf @@ -15,13 +15,18 @@ workflow DETERMINE_SPECIES { // TODO can try using ifEmpty operator or exit codes in the modules themselves main: + def id_method = null reports = Channel.empty() results = Channel.empty() versions = Channel.empty() + taxon_identifications = Channel.empty() + + def TAXON_PREFIX_STRIP = ~/^\w__/ + if (params.run_kraken){ log.info "Running kraken2 for contigs classification" KRAKEN(contigs, params.kraken.db ? file(params.kraken.db) : error("--kraken2_db ${params.kraken.db} is invalid")) - + id_method = "Kraken2" // join contigs for classification split_contigs = KRAKEN.out.classified_contigs.join(KRAKEN.out.report).join(KRAKEN.out.kraken_output) results = results.mix(KRAKEN.out.report) @@ -30,28 +35,35 @@ workflow DETERMINE_SPECIES { }) parsed = PARSE_KRAKEN(KRAKEN.out.report) - reports = reports.mix(parsed.kraken_top.map{ - meta, report -> tuple(meta, params.top_hit_species, report) - }) - - top_hit = parsed.kraken_top + taxon_identifications = parsed.kraken_top versions = versions.mix(parsed.versions) versions = versions.mix(KRAKEN.out.versions) }else { log.info "Using mash screen for sample classification" + id_method = "Mash" MASH_SCREEN(contigs, params.mash.mash_sketch ? file(params.mash.mash_sketch) : error("--mash_sketch ${params.mash_sketch} is invalid")) results = results.mix(MASH_SCREEN.out.mash_data) parsed = PARSE_MASH(MASH_SCREEN.out.mash_data, Channel.value("top")) - reports = reports.mix(parsed.mash_out.map{ - meta, report -> tuple(meta, params.top_hit_species, report) - }) - top_hit = parsed.mash_out + taxon_identifications = parsed.mash_out versions = versions.mix(MASH_SCREEN.out.versions) versions = versions.mix(parsed.versions) } + top_hit = taxon_identifications.map{meta, output -> tuple(meta, output - TAXON_PREFIX_STRIP)} + reports = reports.mix( top_hit.map{ + meta, report -> tuple(meta, params.top_hit_species, report) + } + ) + + + // Create a channel identifying pipelines output ID + id_channel = top_hit.map{ + meta, output -> tuple(meta, params.top_hit_method, id_method) + } + reports = reports.mix(id_channel) + emit: diff --git a/tests/main.nf.test b/tests/main.nf.test index 323d99bf..ac8d929f 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -88,10 +88,11 @@ nextflow_pipeline { assert !iridanext_metadata.CSE.containsKey("Length Value") assert !iridanext_metadata.CSE.containsKey("nr contigs Status") assert !iridanext_metadata.CSE.containsKey("nr contigs Value") - assert iridanext_metadata.CSE."QC Summary" == "[FAILED] Sample is likely contaminated, REISOLATION AND RESEQUENCING RECOMMENDED\nPassed Tests: 0/6\nSpecies ID: null" + assert iridanext_metadata.CSE."QC Summary" == "FAILED Species ID: null; Passed Tests: 0/6; Organism QC Criteria: No organism specific QC data available." assert iridanext_metadata.CSE."Downsampled" == false - assert !iridanext_metadata.CSE.containsKey("Species") + assert !iridanext_metadata.CSE.containsKey("predicted_identification_name") + assert !iridanext_metadata.CSE.containsKey("predicted_identification_method") assert !iridanext_metadata.CSE.containsKey("GC (%)") //assert iridanext_metadata.CSE."Mean Sequence Length Forward" == 150 // Base count after decontamination @@ -133,6 +134,7 @@ nextflow_pipeline { } then { + assert workflow.success assert path("$launchDir/results").exists() @@ -216,10 +218,11 @@ nextflow_pipeline { assert iridanext_metadata.short."Length Value" == 4949 assert iridanext_metadata.short."nr contigs Status" == "WARNING" assert iridanext_metadata.short."nr contigs Value" == 1 - assert iridanext_metadata.short."QC Summary" == "[FAILED] RESEQUENCING IS RECOMMENDED\nPassed Tests: 5/6\nSpecies ID: No Species Identified" + assert iridanext_metadata.short."QC Summary" == "FAILED Species ID: No Species Identified; Passed Tests: 5/6; Organism QC Criteria: No organism specific QC data available." assert iridanext_metadata.short."Downsampled" == false - assert iridanext_metadata.short."Species" == "No Species Identified" + assert iridanext_metadata.short."predicted_identification_name" == "No Species Identified" + assert iridanext_metadata.short."predicted_identification_method" == "Mash" assert iridanext_metadata.short."GC (%)" == "52.96" assert iridanext_metadata.short."Mean Sequence Length Forward" == 250 assert iridanext_metadata.short."BaseCount" == 237500 diff --git a/tests/pipelines/main.from_assemblies.nf.test b/tests/pipelines/main.from_assemblies.nf.test index 0ca478c8..57d0b4c7 100644 --- a/tests/pipelines/main.from_assemblies.nf.test +++ b/tests/pipelines/main.from_assemblies.nf.test @@ -121,10 +121,11 @@ nextflow_pipeline { assert ecoli_metadata."Length Value" == 5333525 assert ecoli_metadata."nr contigs Status" == "PASSED" assert ecoli_metadata."nr contigs Value" == 187 - assert ecoli_metadata."QC Summary" == "[FAILED] Sample is likely contaminated, REISOLATION AND RESEQUENCING RECOMMENDED\nPassed Tests: 3/4\nSpecies ID: s__Escherichia coli" + assert ecoli_metadata."QC Summary" == "FAILED Species ID: Escherichia coli; Passed Tests: 3/4; Organism QC Criteria: Escherichia coli" assert ecoli_metadata."Downsampled" == false - assert ecoli_metadata."Species" == "s__Escherichia coli" + assert ecoli_metadata."predicted_identification_name" == "Escherichia coli" + assert ecoli_metadata."predicted_identification_method" == "Mash" assert ecoli_metadata."ECTyper Database" == "v1.0 (11-03-2020)" assert ecoli_metadata."ECTyper Evidence" == "Based on 3 allele(s)" @@ -304,7 +305,7 @@ nextflow_pipeline { assert ecoli_metadata."Length Value" == 5299656 assert ecoli_metadata."nr contigs Status" == "PASSED" assert ecoli_metadata."nr contigs Value" == 123 - assert ecoli_metadata."QC Summary" == "[FAILED] Sample is likely contaminated, REISOLATION AND RESEQUENCING RECOMMENDED\nPassed Tests: 3/4\nSpecies ID: s__Escherichia coli" + assert ecoli_metadata."QC Summary" == "FAILED Species ID: Escherichia coli; Passed Tests: 3/4; Organism QC Criteria: Escherichia coli" // Read in filtered assembly fasta to verify number of contigs def assemblyLines = path("$launchDir/results/Assembly/FinalAssembly/ecoli_GCA_000947975/ecoli_GCA_000947975.final.filtered.assembly.fasta.gz").readLinesGzip() @@ -393,7 +394,7 @@ nextflow_pipeline { assert salmonella_metadata."Length Value" == 4944000 assert salmonella_metadata."nr contigs Status" == "PASSED" assert salmonella_metadata."nr contigs Value" == 3 - assert salmonella_metadata."QC Summary" == "[FAILED] Sample is likely contaminated, REISOLATION AND RESEQUENCING RECOMMENDED\nPassed Tests: 3/4\nSpecies ID: s__Salmonella enterica" + assert salmonella_metadata."QC Summary" == "FAILED Species ID: Salmonella enterica; Passed Tests: 3/4; Organism QC Criteria: Salmonella" assert salmonella_metadata."Downsampled" == false @@ -430,9 +431,9 @@ nextflow_pipeline { assert final_report.salmonella_GCA_000008105.QualityAnalysis.length.qc_status == "PASSED" // Tests - assert salmonella_json.SpeciesTopHit == "s__Salmonella enterica" - assert iridanext_metadata.salmonella_GCA_000008105."Species" == "s__Salmonella enterica" - assert final_report_tmap.SpeciesTopHit == "s__Salmonella enterica" + assert salmonella_json.SpeciesTopHit == "Salmonella enterica" + assert iridanext_metadata.salmonella_GCA_000008105."predicted_identification_name" == "Salmonella enterica" + assert iridanext_metadata.salmonella_GCA_000008105."predicted_identification_method" == "Mash" assert salmonella_json.QUAST."0"."Total length" == "4944000" assert final_report_tmap."QUAST.0.Total length" == "4944000" @@ -559,7 +560,7 @@ nextflow_pipeline { assert listeria_metadata."Length Value" == 2944528 assert listeria_metadata."nr contigs Status" == "PASSED" assert listeria_metadata."nr contigs Value" == 1 - assert listeria_metadata."QC Summary" == "[FAILED] Sample is likely contaminated, REISOLATION AND RESEQUENCING RECOMMENDED\nPassed Tests: 3/4\nSpecies ID: s__Listeria monocytogenes" + assert listeria_metadata."QC Summary" == "FAILED Species ID: Listeria monocytogenes; Passed Tests: 3/4; Organism QC Criteria: Listeria" assert listeria_metadata."Downsampled" == false @@ -592,9 +593,10 @@ nextflow_pipeline { assert final_report.listeria_GCF_000196035.QualityAnalysis.length.qc_status == "PASSED" // Tests - assert listeria_json.SpeciesTopHit == "s__Listeria monocytogenes" - assert iridanext_metadata.listeria_GCF_000196035."Species" == "s__Listeria monocytogenes" - assert final_report_tmap.SpeciesTopHit == "s__Listeria monocytogenes" + assert listeria_json.SpeciesTopHit == "Listeria monocytogenes" + assert iridanext_metadata.listeria_GCF_000196035."predicted_identification_name" == "Listeria monocytogenes" + assert iridanext_metadata.listeria_GCF_000196035."predicted_identification_method" == "Mash" + assert final_report_tmap.SpeciesTopHit == "Listeria monocytogenes" assert listeria_json.QUAST."0"."Total length" == "2944528" assert final_report_tmap."QUAST.0.Total length" == "2944528" diff --git a/tests/subworkflows/local/qc_assemblies/qc_assemblies.nf.test.snap b/tests/subworkflows/local/qc_assemblies/qc_assemblies.nf.test.snap index 878ef970..ba4f6864 100644 --- a/tests/subworkflows/local/qc_assemblies/qc_assemblies.nf.test.snap +++ b/tests/subworkflows/local/qc_assemblies/qc_assemblies.nf.test.snap @@ -180,8 +180,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.04.1" + "nextflow": "23.04.0" }, - "timestamp": "2024-04-18T13:53:27.760811189" + "timestamp": "2024-06-20T12:08:01.726320878" } } \ No newline at end of file