From d73c63a99a01c076f155079ad379c957af4ef7b5 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 19 Apr 2010 05:47:17 +0000 Subject: [PATCH] Redoing the conversion to VariantContext: instead of walkers passing in a ref allele, they pass in the ref context and the adaptors create the allele. This is the right way of doing it. Also, adding some more useful integration tests. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3194 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/refdata/RefMetaDataTracker.java | 48 +++--- .../gatk/refdata/VariantContextAdaptors.java | 154 +++++++++++++----- .../sting/gatk/walkers/VariantsToVCF.java | 7 +- .../indels/RealignerTargetCreator.java | 2 +- .../walkers/sequenom/PickSequenomProbes.java | 4 +- .../sequenom/SequenomValidationConverter.java | 34 +--- .../walkers/TestVariantContextWalker.java | 2 +- .../walkers/SnpCallRateByCoverageWalker.java | 7 +- .../AnalyzeAnnotationsWalker.java | 6 +- .../VariantConcordanceROCCurveWalker.java | 2 +- .../variantoptimizer/VariantOptimizer.java | 2 +- .../walkers/VariantsToVCFIntegrationTest.java | 15 ++ .../PickSequenomProbesIntegrationTest.java | 17 ++ 13 files changed, 184 insertions(+), 116 deletions(-) create mode 100755 java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 3cc8f3fc9..23db84144 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; @@ -113,7 +113,7 @@ public class RefMetaDataTracker { * Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records * at the current site, they all will be added to the list as separate elements. * - * @return + * @return collection of all rods */ public Collection getAllRods() { List l = new ArrayList(); @@ -129,7 +129,7 @@ public class RefMetaDataTracker { * Get all of the RMD tracks at the current site. Each track is returned as a single compound * object (RODRecordList) that may contain multiple RMD records associated with the current site. * - * @return + * @return collection of all tracks */ public Collection getBoundRodTracks() { LinkedList bound = new LinkedList(); @@ -176,9 +176,13 @@ public class RefMetaDataTracker { /** * Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number * of entries per ROD. + * The name of each VariantContext corresponds to the ROD name. + * + * @param ref reference context + * @return variant context */ - public Collection getAllVariantContexts() { - return getAllVariantContexts(null, null, false, false); + public Collection getAllVariantContexts(ReferenceContext ref) { + return getAllVariantContexts(ref, null, null, false, false); } /** @@ -192,17 +196,18 @@ public class RefMetaDataTracker { * * The name of each VariantContext corresponds to the ROD name. * - * @param curLocation location + * @param ref reference context * @param allowedTypes allowed types + * @param curLocation location * @param requireStartHere do we require the rod to start at this location? * @param takeFirstOnly do we take the first rod only? * @return variant context */ - public Collection getAllVariantContexts(EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { + public Collection getAllVariantContexts(ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { List contexts = new ArrayList(); for ( RODRecordList rodList : getBoundRodTracks() ) { - addVariantContexts(contexts, rodList, allowedTypes, curLocation, null, requireStartHere, takeFirstOnly); + addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly); } return contexts; @@ -221,25 +226,25 @@ public class RefMetaDataTracker { * @return variant context */ public Collection getVariantContexts(String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, null, requireStartHere, takeFirstOnly); + return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly); } - public Collection getVariantContexts(String name, EnumSet allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) { - return getVariantContexts(Arrays.asList(name), allowedTypes, curLocation, ref, requireStartHere, takeFirstOnly); + public Collection getVariantContexts(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { + return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly); } public Collection getVariantContexts(Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - return getVariantContexts(names, allowedTypes, curLocation, null, requireStartHere, takeFirstOnly); + return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly); } - public Collection getVariantContexts(Collection names, EnumSet allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) { + public Collection getVariantContexts(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { Collection contexts = new ArrayList(); for ( String name : names ) { RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match if ( rodList != null ) - addVariantContexts(contexts, rodList, allowedTypes, curLocation, ref, requireStartHere, takeFirstOnly ); + addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); } return contexts; @@ -267,15 +272,11 @@ public class RefMetaDataTracker { } - private void addVariantContexts(Collection contexts, RODRecordList rodList, EnumSet allowedTypes, GenomeLoc curLocation, Allele ref, boolean requireStartHere, boolean takeFirstOnly ) { + private void addVariantContexts(Collection contexts, RODRecordList rodList, ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { for ( GATKFeature rec : rodList ) { if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { // ok, we might actually be able to turn this record in a variant context - VariantContext vc; - if ( ref == null ) - vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject()); - else - vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref); + VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref); if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted continue; @@ -304,8 +305,9 @@ public class RefMetaDataTracker { * defaultValue.getLocation() may be not equal to what RODRecordList's location would be expected to be otherwise: * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, * regardless of the presence of "extended" RODs overlapping with that location). - * @param name - * @return + * @param name track name + * @param requireExactMatch do we require an exact match of the rod name? + * @return track data for the given rod */ private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) { //logger.debug(String.format("Lookup %s%n", name)); @@ -335,7 +337,7 @@ public class RefMetaDataTracker { /** * Returns the canonical name of the rod name (lowercases it) * @param name the name of the rod - * @return + * @return canonical name of the rod */ private final String canonicalName(final String name) { return name.toLowerCase(); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 40e35b956..90a13bf55 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -4,9 +4,11 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall; import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; @@ -56,7 +58,7 @@ public class VariantContextAdaptors { /** generic superclass */ private static abstract class VCAdaptor { abstract VariantContext convert(String name, Object input); - abstract VariantContext convert(String name, Object input, Allele refAllele); + abstract VariantContext convert(String name, Object input, ReferenceContext ref); } public static VariantContext toVariantContext(String name, Object variantContainingObject) { @@ -67,11 +69,11 @@ public class VariantContextAdaptors { } } - public static VariantContext toVariantContext(String name, Object variantContainingObject, Allele refAllele) { + public static VariantContext toVariantContext(String name, Object variantContainingObject, ReferenceContext ref) { if ( ! adaptors.containsKey(variantContainingObject.getClass()) ) return null; else { - return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject, refAllele); + return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject, ref); } } @@ -91,14 +93,15 @@ public class VariantContextAdaptors { private static class RodDBSnpAdaptor extends VCAdaptor { VariantContext convert(String name, Object input) { - if ( ! Allele.acceptableAlleleBases(((rodDbSNP)input).getReference()) ) - return null; - Allele refAllele = new Allele(((rodDbSNP)input).getReference(), true); - return convert(name, input, refAllele); + return convert(name, input, null); } - VariantContext convert(String name, Object input, Allele refAllele) { + VariantContext convert(String name, Object input, ReferenceContext ref) { rodDbSNP dbsnp = (rodDbSNP)input; + if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) + return null; + Allele refAllele = new Allele(dbsnp.getReference(), true); + if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) { // add the reference allele List alleles = new ArrayList(); @@ -132,28 +135,25 @@ public class VariantContextAdaptors { private static class RodVCFAdaptor extends VCAdaptor { VariantContext convert(String name, Object input) { - Allele refAllele = new Allele(((RodVCF)input).getReference(), true); - return vcfToVariantContext(name, ((RodVCF)input).getRecord(), refAllele); + return vcfToVariantContext(name, ((RodVCF)input).getRecord(), null); } - VariantContext convert(String name, Object input, Allele refAllele) { - return vcfToVariantContext(name, ((RodVCF)input).getRecord(), refAllele); + VariantContext convert(String name, Object input, ReferenceContext ref) { + return vcfToVariantContext(name, ((RodVCF)input).getRecord(), ref); } } private static class VCFRecordAdaptor extends VCAdaptor { VariantContext convert(String name, Object input) { - Allele refAllele = new Allele(((VCFRecord)input).getReference(), true); - return vcfToVariantContext(name, (VCFRecord)input, refAllele); + return vcfToVariantContext(name, (VCFRecord)input, null); } - VariantContext convert(String name, Object input, Allele refAllele) { - return vcfToVariantContext(name, (VCFRecord)input, refAllele); + VariantContext convert(String name, Object input, ReferenceContext ref) { + return vcfToVariantContext(name, (VCFRecord)input, ref); } } - - private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, Allele refAllele) { + private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) { if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) { // add the reference allele if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) { @@ -167,7 +167,9 @@ public class VariantContextAdaptors { // add all of the alt alleles List alleles = new ArrayList(); + Allele refAllele = determineRefAllele(vcf, ref); alleles.add(refAllele); + for ( String alt : vcf.getAlternateAlleleList() ) { if ( ! Allele.acceptableAlleleBases(alt) ) { //System.out.printf("Excluding vcf record %s%n", vcf); @@ -184,10 +186,14 @@ public class VariantContextAdaptors { List genotypeAlleles = new ArrayList(); for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) { Allele a = Allele.getMatchingAllele(alleles, s.getBases()); - if ( a == null ) - throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf); - - genotypeAlleles.add(a); + if ( a == null ) { + if ( vcf.getType() == Variation.VARIANT_TYPE.INSERTION || vcf.getType() == Variation.VARIANT_TYPE.DELETION ) + genotypeAlleles.add(refAllele); + else + throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf); + } else { + genotypeAlleles.add(a); + } } Map fields = new HashMap(); @@ -207,13 +213,45 @@ public class VariantContextAdaptors { } double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError(); - VariantContext vc = new VariantContext(name, vcf.getLocation(), alleles, genotypes, qual, filters, attributes); + + GenomeLoc loc = vcf.getLocation(); + if ( vcf.isDeletion() ) + loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1); + + VariantContext vc = new VariantContext(name, loc, alleles, genotypes, qual, filters, attributes); vc.validate(); return vc; } else return null; // can't handle anything else } + private static Allele determineRefAllele(VCFRecord vcf, ReferenceContext ref) { + Allele refAllele; + if ( ref == null ) { + refAllele = new Allele(vcf.getReference(), true); + } else if ( !vcf.isIndel() ) { + refAllele = new Allele(Character.toString(ref.getBase()), true); + } else if ( vcf.isInsertion() ) { + refAllele = new Allele(Allele.NULL_ALLELE_STRING, true); + } else if ( vcf.isDeletion() ) { + int start = (int)(ref.getLocus().getStart() - ref.getWindow().getStart() + 1); + int delLength = 0; + for ( VCFGenotypeEncoding enc : vcf.getAlternateAlleles() ) { + if ( enc.getLength() > delLength ) + delLength = enc.getLength(); + } + if ( delLength > ref.getWindow().getStop() - ref.getLocus().getStop() ) + throw new IllegalArgumentException("Length of deletion is larger than reference context provided at " + ref.getLocus()); + + char[] deletion = new char[delLength]; + System.arraycopy(ref.getBases(), start, deletion, 0, delLength); + refAllele = new Allele(String.valueOf(deletion), true); + } else { + throw new UnsupportedOperationException("Conversion of VCF type " + vcf.getType() + " is not supported."); + } + + return refAllele; + } public static VCFHeader createVCFHeader(Set hInfo, VariantContext vc) { HashSet names = new LinkedHashSet(); @@ -364,11 +402,15 @@ public class VariantContextAdaptors { private static List calcVCFGenotypeKeys(VariantContext vc) { Set keys = new HashSet(); + boolean sawGoodQual = false; for ( Genotype g : vc.getGenotypes().values() ) { keys.addAll(g.getAttributes().keySet()); + if ( g.hasNegLog10PError() ) + sawGoodQual = true; } - keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY); + if ( sawGoodQual ) + keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY); return Utils.sorted(new ArrayList(keys)); } @@ -380,13 +422,17 @@ public class VariantContextAdaptors { private static class PlinkRodAdaptor extends VCAdaptor { VariantContext convert(String name, Object input) { - throw new UnsupportedOperationException("Conversion from Plink to VC requires a reference allele"); + return convert(name, input, null); } - VariantContext convert(String name, Object input, Allele refAllele) { + VariantContext convert(String name, Object input, ReferenceContext ref) { + if ( ref == null ) + throw new UnsupportedOperationException("Conversion from Plink to VariantContext requires a reference context"); + PlinkRod plink = (PlinkRod)input; HashSet VCAlleles = new HashSet(); + Allele refAllele = determineRefAllele(plink, ref); VCAlleles.add(refAllele); // mapping from Plink Allele to VC Allele (since the PlinkRod does not annotate any of the Alleles as reference) @@ -440,6 +486,23 @@ public class VariantContextAdaptors { throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file"); } } + + private Allele determineRefAllele(PlinkRod plink, ReferenceContext ref) { + Allele refAllele; + if ( !plink.isIndel() ) { + refAllele = new Allele(Character.toString(ref.getBase()), true); + } else if ( plink.isInsertion() ) { + refAllele = new Allele(Allele.NULL_ALLELE_STRING, true); + } else { + long maxLength = ref.getWindow().getStop() - ref.getLocus().getStop(); + if ( plink.getLength() > maxLength ) + throw new UnsupportedOperationException("Plink conversion currently can only handle indels up to length " + maxLength); + char[] deletion = new char[plink.getLength()]; + System.arraycopy(ref.getBases(), 1, deletion, 0, plink.getLength()); + refAllele = new Allele(String.valueOf(deletion), true); + } + return refAllele; + } } // -------------------------------------------------------------------------------------------------------------- @@ -455,22 +518,23 @@ public class VariantContextAdaptors { * @return a VariantContext object */ VariantContext convert(String name, Object input) { - if ( ! Allele.acceptableAlleleBases(((RodGLF)input).getReference()) ) - return null; - Allele refAllele = new Allele(((RodGLF)input).getReference(), true); - return convert(name, input, refAllele); + return convert(name, input, null); } /** * convert to a Variant Context, given: * @param name the name of the ROD * @param input the Rod object, in this case a RodGLF - * @param refAllele the reference base as an Allele object + * @param ref the reference context * @return a VariantContext object */ - VariantContext convert(String name, Object input, Allele refAllele) { + VariantContext convert(String name, Object input, ReferenceContext ref) { RodGLF glf = (RodGLF)input; + if ( ! Allele.acceptableAlleleBases(glf.getReference()) ) + return null; + Allele refAllele = new Allele(glf.getReference(), true); + // make sure we can convert it if ( glf.isSNP() || glf.isIndel()) { // add the reference allele @@ -528,21 +592,21 @@ public class VariantContextAdaptors { * @return a VariantContext object */ VariantContext convert(String name, Object input) { - if ( ! Allele.acceptableAlleleBases(((RodGeliText)input).getReference()) ) - return null; - Allele refAllele = new Allele(((RodGeliText)input).getReference(), true); - return convert(name, input, refAllele); + return convert(name, input, null); } /** * convert to a Variant Context, given: - * @param name the name of the ROD + * @param name the name of the ROD * @param input the Rod object, in this case a RodGeliText - * @param refAllele the reference base as an Allele object + * @param ref the reference context * @return a VariantContext object */ - VariantContext convert(String name, Object input, Allele refAllele) { + VariantContext convert(String name, Object input, ReferenceContext ref) { RodGeliText geli = (RodGeliText)input; + if ( ! Allele.acceptableAlleleBases(geli.getReference()) ) + return null; + Allele refAllele = new Allele(geli.getReference(), true); // make sure we can convert it if ( geli.isSNP() || geli.isIndel()) { @@ -593,21 +657,25 @@ public class VariantContextAdaptors { * @return a VariantContext object */ VariantContext convert(String name, Object input) { - throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires knowledge of the reference allele"); + return convert(name, input, null); } /** * convert to a Variant Context, given: - * @param name the name of the ROD + * @param name the name of the ROD * @param input the Rod object, in this case a RodGeliText - * @param refAllele the reference base as an Allele object + * @param ref the reference context * @return a VariantContext object */ - VariantContext convert(String name, Object input, Allele refAllele) { + VariantContext convert(String name, Object input, ReferenceContext ref) { + if ( ref == null ) + throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context"); + HapMapROD hapmap = (HapMapROD)input; // add the reference allele HashSet alleles = new HashSet(); + Allele refAllele = new Allele(Character.toString(ref.getBase()), true); alleles.add(refAllele); // make a mapping from sample to genotype diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java index 251100634..ad349995b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -15,6 +14,7 @@ import java.util.*; * Converts variants from other file formats to VCF format. */ @Requires(value={},referenceMetaData=@RMD(name=VariantsToVCF.INPUT_ROD_NAME,type= ReferenceOrderedDatum.class)) +@Reference(window=@Window(start=0,stop=40)) public class VariantsToVCF extends RodWalker { public static final String INPUT_ROD_NAME = "variant"; @@ -27,7 +27,7 @@ public class VariantsToVCF extends RodWalker { // Don't allow mixed types for now private EnumSet ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL); - private String[] ALLOWED_FORMAT_FIELDS = {"GT"}; + private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY }; public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) @@ -35,8 +35,7 @@ public class VariantsToVCF extends RodWalker { rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getReferenceMetaData(rodDbSNP.STANDARD_DBSNP_TRACK_NAME)); - Allele refAllele = new Allele(Character.toString(ref.getBase()), true); - Collection contexts = tracker.getVariantContexts(INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), refAllele, true, false); + Collection contexts = tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, context.getLocation(), true, false); for ( VariantContext vc : contexts ) { VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), Arrays.asList(ALLOWED_FORMAT_FIELDS), false, false); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 13c70729a..88634e8d2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -73,7 +73,7 @@ public class RealignerTargetCreator extends LocusWalker { String refBase = String.valueOf(ref.getBase()); - Collection VCs = tracker.getAllVariantContexts(); + Collection VCs = tracker.getAllVariantContexts(ref); if ( VCs.size() == 0 ) return ""; @@ -111,7 +111,7 @@ public class PickSequenomProbes extends RodWalker { else if ( vc.isInsertion() ) assay_sequence = leading_bases + refBase + "[-/" + vc.getAlternateAllele(0).toString() + "]" + trailing_bases; else if ( vc.isDeletion() ) - assay_sequence = leading_bases + refBase + "[" + vc.getReference().toString() + "/-]" + trailing_bases.substring(vc.getReference().length()); + assay_sequence = leading_bases + refBase + "[" + new String(vc.getReference().getBases()) + "/-]" + trailing_bases.substring(vc.getReference().length()); else return ""; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java index b330d7c45..4a5cec21d 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java @@ -33,9 +33,6 @@ public class SequenomValidationConverter extends RodWalker { // "used for smart Hardy-Weinberg annotation",required = false) //private File popFile = null; - // max allowable indel size (based on ref window) - private static final int MAX_INDEL_SIZE = 40; - // sample names private TreeSet sampleNames = null; @@ -74,10 +71,7 @@ public class SequenomValidationConverter extends RodWalker { Object rod = rods.get(0); - // determine the reference allele - Allele refAllele = determineRefAllele(rod, ref); - - VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, refAllele); + VariantContext vc = VariantContextAdaptors.toVariantContext("sequenom", rod, ref); if ( sampleNames == null ) sampleNames = new TreeSet(vc.getSampleNames()); @@ -85,32 +79,6 @@ public class SequenomValidationConverter extends RodWalker { return addVariantInformationToCall(ref, vc, rod); } - private Allele determineRefAllele(Object rod, ReferenceContext ref) { - Allele refAllele; - - // ugly hack to get around the fact that the Plink rod needs - // a very specific determination of the reference allele - if ( rod instanceof PlinkRod ) { - PlinkRod plink = (PlinkRod)rod; - if ( !plink.isIndel() ) { - refAllele = new Allele(Character.toString(ref.getBase()), true); - } else if ( plink.isInsertion() ) { - refAllele = new Allele(PlinkRod.SEQUENOM_NO_BASE, true); - } else { - if ( plink.getLength() > MAX_INDEL_SIZE ) - throw new UnsupportedOperationException("PlinkToVCF currently can only handle indels up to length " + MAX_INDEL_SIZE); - char[] deletion = new char[plink.getLength()]; - System.arraycopy(ref.getBases(), 1, deletion, 0, plink.getLength()); - refAllele = new Allele(new String(deletion), true); - } - } else { - refAllele = new Allele(Character.toString(ref.getBase()), true); - } - - return refAllele; - } - - public Integer reduce(VCFRecord call, Integer numVariants) { if ( call != null ) { numVariants++; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java index e9bd120ff..839381aa1 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java @@ -45,7 +45,7 @@ public class TestVariantContextWalker extends RodWalker { EnumSet allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType); int n = 0; - for (VariantContext vc : tracker.getAllVariantContexts(allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) { + for (VariantContext vc : tracker.getAllVariantContexts(ref, allowedTypes, context.getLocation(), onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) { if ( writer != null && n == 0 ) { if ( ! wroteHeader ) { writer.writeHeader(VariantContextAdaptors.createVCFHeader(null, vc)); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java index 635d6e79d..39a40c093 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java @@ -6,9 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF; import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; @@ -18,7 +16,6 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.Collection; @@ -49,12 +46,12 @@ public class SnpCallRateByCoverageWalker extends LocusWalker, Strin return (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 && context.getBasePileup().size() != 0 && tracker != null && - tracker.getAllVariantContexts() != null + tracker.getAllVariantContexts(ref) != null ); } public List map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - Collection contexts = tracker.getAllVariantContexts(); + Collection contexts = tracker.getAllVariantContexts(ref); for (VariantContext vc : contexts) { if (vc.isVariant() && !vc.isFiltered()) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index 7181d1988..0dbbfafb8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; import java.util.HashMap; +import java.util.Collection; /* * Copyright (c) 2010 The Broad Institute @@ -100,11 +101,12 @@ public class AnalyzeAnnotationsWalker extends RodWalker { public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { + Collection VCs = tracker.getAllVariantContexts(ref); // First find out if this variant is in the truth sets boolean isInTruthSet = false; boolean isTrueVariant = false; - for ( VariantContext vc : tracker.getAllVariantContexts() ) { + for ( VariantContext vc : VCs ) { if( vc.getName().toUpperCase().startsWith("TRUTH") ) { isInTruthSet = true; if (vc.isVariant()) @@ -113,7 +115,7 @@ public class AnalyzeAnnotationsWalker extends RodWalker { } // Add each annotation in this VCF Record to the dataManager - for ( VariantContext vc : tracker.getAllVariantContexts() ) { + for ( VariantContext vc : VCs ) { if( !vc.getName().toUpperCase().startsWith("TRUTH") ) { if( vc.isVariant() ) { dataManager.addAnnotations( vc, ref.getBase(), SAMPLE_NAME, isInTruthSet, isTrueVariant ); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantConcordanceROCCurveWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantConcordanceROCCurveWalker.java index 525a5fa30..90b309963 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantConcordanceROCCurveWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantConcordanceROCCurveWalker.java @@ -126,7 +126,7 @@ public class VariantConcordanceROCCurveWalker extends RodWalker double annotationValues[] = new double[numAnnotations]; - for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) { + for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) { if( vc != null && vc.isSNP() ) { if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java index 73e66051d..6be807945 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java @@ -66,4 +66,19 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { md5); executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst(); } + + @Test + public void testGenotypesToVCFUsingVCFInput() { + List md5 = new ArrayList(); + md5.add("92d661a3789e55078197666eb9ee7020"); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + oneKGLocation + "reference/human_b36_both.fasta" + + " -B variant,VCF," + validationDataLocation + "complexExample.vcf" + + " -T VariantsToVCF" + + " -o %s", + 1, // just one output file + md5); + executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst(); + } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java new file mode 100755 index 000000000..ae9462dd1 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java @@ -0,0 +1,17 @@ +package org.broadinstitute.sting.gatk.walkers.sequenom; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +public class PickSequenomProbesIntegrationTest extends WalkerTest { + @Test + public void testProbes() { + String testVCF = validationDataLocation + "complexExample.vcf"; + String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B input,VCF,"+testVCF+" -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, + Arrays.asList("6b5409cc78960f1be855536ed89ea9dd")); + executeTest("Test probes", spec); + } +} \ No newline at end of file