diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java index 0d48a4105..60e84ae6d 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java @@ -280,7 +280,10 @@ public class Allele implements Comparable { } } - return null; // couldn't find anything + if ( wouldBeNoCallAllele(alleleBases) ) + return NO_CALL; + else + return null; // couldn't find anything } public static List resolveAlleles(List possibleAlleles, List alleleStrings) { diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java index ea1200bfe..b9bc95298 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java @@ -114,7 +114,11 @@ public class Genotype { if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles"); int nNoCalls = 0; - for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; } + for ( Allele allele : alleles ) { + if ( allele == null ) + throw new IllegalArgumentException("BUG: allele cannot be null in Genotype"); + nNoCalls += allele.isNoCall() ? 1 : 0; + } if ( nNoCalls > 0 && nNoCalls != alleles.size() ) throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 570ecf79f..d67f8b23b 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; import java.util.*; @@ -13,7 +14,7 @@ import java.util.*; * The standard interaction model is: * * Traversal system arrives at a site, which has a bunch of rods covering it - * Traversal calls tracker.bind(name, rod) for each rod in rods +Genotype * Traversal calls tracker.bind(name, rod) for each rod in rods * Traversal passes tracker to the walker * walker calls lookup(name, default) to obtain the rod values at this site, or default if none was * bound at this site. @@ -197,37 +198,106 @@ public class RefMetaDataTracker { } - public Collection getAllVariantContexts(GenomeLoc curLocation) { - return getAllVariantContexts(curLocation, null, false, false); + /** + * Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number + * of entries per ROD. + */ + public Collection getAllVariantContexts() { + return getAllVariantContexts(null, null, false, false); } - public Collection getAllVariantContexts(GenomeLoc curLocation, EnumSet allowedTypes, boolean requireStartHere, boolean takeFirstOnly ) { + + /** + * Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only + * VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation + * must not be null, and only records whose start position is == to curLocation.getStart() will be returned. + * If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course, + * this single object must pass the allowed types and start here options if provided. Note that the result + * may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning + * the current location. + * + * The name of each VariantContext corresponds to the ROD name. + * + * @param curLocation + * @param allowedTypes + * @param requireStartHere + * @param takeFirstOnly + * @return + */ + public Collection getAllVariantContexts(EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { List contexts = new ArrayList(); for ( RODRecordList rodList : getBoundRodTracks() ) { - for ( ReferenceOrderedDatum rec : rodList.getRecords() ) { - if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec) ) { - // ok, we might actually be able to turn this record in a variant context - VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec); - - // now, let's decide if we want to keep it - boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType()); - boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart(); - - if ( goodType && goodPos ) { // ok, we are going to keep this thing - contexts.add(vc); - - if ( takeFirstOnly ) - // we only want the first passing instance, so break the loop over records in rodList - break; - } - } - } + addVariantContexts(contexts, rodList, allowedTypes, curLocation, requireStartHere, takeFirstOnly); } return contexts; } + /** + * Gets the variant contexts associated with track name name + * + * see getVariantContexts for more information. + * + * @param name + * @param curLocation + * @param allowedTypes + * @param requireStartHere + * @param takeFirstOnly + * @return + */ + public Collection getVariantContexts(String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { + RODRecordList rodList = getTrackData(name, null); + Collection contexts = new ArrayList(); + + if ( rodList != null ) + addVariantContexts(contexts, rodList, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); + + return contexts; + } + + /** + * Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not. + * see getVariantContexts for more information. + * + * @param name + * @param curLocation + * @param allowedTypes + * @param requireStartHere + * @return + */ + public VariantContext getVariantContext(String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) { + Collection contexts = getVariantContexts(name, allowedTypes, curLocation, requireStartHere, false ); + + if ( contexts.size() > 1 ) + throw new StingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation); + + return contexts.iterator().next(); + } + + private void addVariantContexts(Collection contexts, RODRecordList rodList, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { + for ( ReferenceOrderedDatum rec : rodList.getRecords() ) { + if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec) ) { + // ok, we might actually be able to turn this record in a variant context + VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec); + + if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted + continue; + + // now, let's decide if we want to keep it + boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType()); + boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart(); + + if ( goodType && goodPos ) { // ok, we are going to keep this thing + contexts.add(vc); + + if ( takeFirstOnly ) + // we only want the first passing instance, so break the loop over records in rodList + break; + } + } + } + } /** @@ -241,10 +311,4 @@ public class RefMetaDataTracker { //logger.debug(String.format("Binding %s to %s", name, rod)); map.put(canonicalName(name), rod); } -/* - public void bind(final String name, ReferenceOrderedDatum rod) { - //logger.debug(String.format("Binding %s to %s", name, rod)); - map.put(canonicalName(name), rod); - } - */ } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 381eff78a..ede1f4740 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; @@ -14,6 +15,13 @@ import java.util.*; * you need to create a adaptor object here and register a converter from your class to this object. When tribble arrives, * we'll use a better approach. * + * To add a new converter: + * + * create a subclass of VCAdaptor, overloading the convert operator + * add it to the static map from input type -> converter where the input type is the object.class you want to convert + * + * That's it + * * @author depristo@broadinstitute.org */ public class VariantContextAdaptors { @@ -141,8 +149,13 @@ public class VariantContextAdaptors { Map genotypes = new HashMap(); for ( VCFGenotypeRecord vcfG : vcf.getVCFGenotypeRecords() ) { List genotypeAlleles = new ArrayList(); - for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) - genotypeAlleles.add(Allele.getMatchingAllele(alleles, s.getBases())); + for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) { + Allele a = Allele.getMatchingAllele(alleles, s.getBases()); + if ( a == null ) + throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf); + + genotypeAlleles.add(a); + } double pError = vcfG.getNegLog10PError() == VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY ? Genotype.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError(); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java index 2d2d87fdb..665367ee4 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java @@ -33,7 +33,7 @@ public class TestVariantContextWalker extends RodWalker { EnumSet allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType); int n = 0; - for (VariantContext vc : tracker.getAllVariantContexts(context.getLocation(), allowedTypes, onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) { + for (VariantContext vc : tracker.getAllVariantContexts(allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) { n++; if ( printContexts ) out.printf(" %s%n", vc); } diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java new file mode 100755 index 000000000..5437494b8 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java @@ -0,0 +1,53 @@ +package org.broadinstitute.sting.gatk.contexts.variantcontext; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; +import java.util.Arrays; +import java.util.List; +import java.io.File; + +public class VariantContextIntegrationTest extends WalkerTest { + private static String root = "-T TestVariantContext" + + " -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " -B vcf,VCF,/humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + + " -R " + oneKGLocation + "reference/human_b36_both.fasta"; + + static HashMap expectations = new HashMap(); + static { + expectations.put("-L 1:1-10000 --printPerLocus", "39c035ae756eb176a7baffcd0f0fe0af"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "33ab797ac3de900e75fc0d9b01efe482"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "38619d704068072a4ccfd354652957a2"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "bf64ab634126382813a6a6b29a5f47d8"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "be087f53429974b4e505cd59f9363bfe"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "d758adbab9011e42c77d502fe4d62c27"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "933ec8327192c5ed58a1952c73fb4f73"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "7d5d0283d92220ee78db7465d675b37f"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); + } + + @Test + public void testConversionSelection() { + + for ( Map.Entry entry : expectations.entrySet() ) { + String extraArgs = entry.getKey(); + String md5 = entry.getValue(); + + WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testDbSNPAndVCFConversions", spec); + } + } + + @Test + public void testLargeScaleConversion() { + // this really just tests that we are seeing the same number of objects over all of chr1 + WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", + 1, // just one output file + Arrays.asList("3fcdd982df080e6abc0afaba6abdf386")); + executeTest("testLargeScaleConversion", spec); + } +} \ No newline at end of file