diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/TranscriptEffectConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/TranscriptEffectConverter.scala index 44ffa840ec..14ec72aa88 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/TranscriptEffectConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/TranscriptEffectConverter.scala @@ -83,6 +83,18 @@ private[adam] object TranscriptEffectConverter extends Serializable with Logging s.split("&").map(MESSAGES.get(_)).toList.flatten } + /** + * Ensembl VEP incorrectly supplies an interval instead of a position + * for some attributes; in these cases use the interval start as the position. + * + * @param s position or interval + * @return position or interval start + */ + private def positionOrIntervalStart(s: String): Int = { + val tokens = s.split("-") + Integer.parseInt(tokens(0)) + } + /** * Split a single or fractional value into optional numerator and denominator values. * @@ -96,8 +108,8 @@ private[adam] object TranscriptEffectConverter extends Serializable with Logging val tokens = s.split("/") tokens.length match { case 0 => (None, None) - case 1 => (Some(Integer.parseInt(tokens(0))), None) - case _ => (Some(Integer.parseInt(tokens(0))), Some(Integer.parseInt(tokens(1)))) + case 1 => (Some(positionOrIntervalStart(tokens(0))), None) + case _ => (Some(positionOrIntervalStart(tokens(0))), Some(Integer.parseInt(tokens(1)))) } } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala index 8ab3b923df..ef49b5a56d 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala @@ -35,6 +35,7 @@ class TranscriptEffectConverterSuite extends ADAMFunSuite { final val INVALID_NUMBER = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4|1/42|not a number|" final val INVALID_FRACTION = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|not a number/2|c.-485C>T|||4|1/42|453|" final val VALID = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4|1/42|453|" + final val VEP_POSITION = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4-5/420|1/42|453|" final val DIFFERENT_ALT = "A|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4|1/42|453|" var variant: Variant = null @@ -170,6 +171,16 @@ class TranscriptEffectConverterSuite extends ADAMFunSuite { }) } + test("parse VCF ANN attribute with Ensembl VEP position attribute") { + val ann = TranscriptEffectConverter.parseAnn(Seq(VEP_POSITION), ValidationStringency.STRICT) + assert(ann.length == 1) + + val te = ann.head + assert(te.getAlternateAllele == "T") + assert(te.getCdsPosition == 4) + assert(te.getCdsLength == 420) + } + test("convert to transcript effect from null VCF ANN attribute in variant context") { when(variantContext.getAttributeAsList("ANN")).thenReturn(null)