diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index ad468f657..678a65024 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -183,8 +183,13 @@ public class GenotypingEngine { } @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) - public List>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList haplotypes, final byte[] ref, final GenomeLoc refLoc, - final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList activeAllelesToGenotype ) { + public List>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, + final ArrayList haplotypes, + final byte[] ref, + final GenomeLoc refLoc, + final GenomeLoc activeRegionWindow, + final GenomeLocParser genomeLocParser, + final ArrayList activeAllelesToGenotype ) { final ArrayList>>> returnCalls = new ArrayList>>>(); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index a87703423..9b8d1b3d7 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -30,7 +30,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6"); + // TODO -- Ryan, do you know why the md5s changed just for the rank sum tests? + final String RyansMd5 = "ff370c42c8b09a29f1aeff5ac57c7ea6"; + final String EricsMd5 = "d8317f4589e8e0c48bcd087cdb75ce88"; + HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", EricsMd5); } private void HCTestComplexVariants(String bam, String args, String md5) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index dd1eea8a4..1b75a2c70 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -170,31 +170,33 @@ public class VariantContextAdaptors { final byte refBaseForIndel = ref.getBases()[index]; - Allele refAllele; - if ( dbsnp.getNCBIRefBase().equals("-") ) - refAllele = Allele.create(refBaseForIndel); - else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) - return null; - else - refAllele = Allele.create(refBaseForIndel + dbsnp.getNCBIRefBase(), true); - boolean addPaddingBase; if ( isSNP(dbsnp) || isMNP(dbsnp) ) addPaddingBase = false; else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) - addPaddingBase = true; + addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp))); else return null; // can't handle anything else + Allele refAllele; + if ( dbsnp.getNCBIRefBase().equals("-") ) + refAllele = Allele.create(refBaseForIndel, true); + else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) + return null; + else + refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true); + final List alleles = new ArrayList(); alleles.add(refAllele); // add all of the alt alleles for ( String alt : getAlternateAlleleList(dbsnp) ) { - if ( ! Allele.acceptableAlleleBases(alt) ) { + if ( Allele.wouldBeNullAllele(alt.getBytes())) + alt = ""; + else if ( ! Allele.acceptableAlleleBases(alt) ) return null; - } - alleles.add(Allele.create((addPaddingBase ? refBaseForIndel : "") + alt, false)); + + alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false)); } final VariantContextBuilder builder = new VariantContextBuilder(); @@ -203,6 +205,17 @@ public class VariantContextAdaptors { builder.alleles(alleles); return builder.make(); } + + private static List stripNullDashes(final List alleles) { + final List newAlleles = new ArrayList(alleles.size()); + for ( final String allele : alleles ) { + if ( allele.equals("-") ) + newAlleles.add(""); + else + newAlleles.add(allele); + } + return newAlleles; + } } // -------------------------------------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 9676704c2..9d96dedef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -25,6 +25,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -262,20 +263,33 @@ public class ValidationAmplicons extends RodWalker { sequenceInvalid = true; invReason.add("SITE_IS_FILTERED"); } + + String refString = validate.getReference().getDisplayString(); + String altString = validate.getAlternateAllele(0).getDisplayString(); + if ( validate.isIndel() ) { sequence.append(Character.toUpperCase((char)ref.getBase())); rawSequence.append(Character.toUpperCase((char)ref.getBase())); + final byte[] refAllele = validate.getReference().getBases(); + refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length)); + if ( refString.isEmpty() ) + refString = "-"; + final byte[] altAllele = validate.getAlternateAllele(0).getBases(); + altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length)); + if ( altString.isEmpty() ) + altString = "-"; } + sequence.append('['); - sequence.append(validate.getAlternateAllele(0).toString()); + sequence.append(altString); sequence.append('/'); - sequence.append(validate.getReference().toString()); + sequence.append(refString); sequence.append(']'); // do this to the raw sequence to -- the indeces will line up that way rawSequence.append('['); - rawSequence.append(validate.getAlternateAllele(0).getBaseString()); + rawSequence.append(altString); rawSequence.append('/'); - rawSequence.append(validate.getReference().getBaseString()); + rawSequence.append(refString); rawSequence.append(']'); allelePos = ref.getLocus(); if ( indelCounter > 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index adf30146f..b73a498bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -381,7 +381,7 @@ public class VariantsToTable extends RodWalker { getters.put("REF", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); - x.append(vc.getReference()); + x.append(vc.getReference().getDisplayString()); return x.toString(); } }); diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 143a053c9..54442622f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -176,7 +176,7 @@ public class Haplotype { newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; } } else if( refAllele.length() < altAllele.length() ) { // insertion - final int altAlleleLength = altAllele.length(); + final int altAlleleLength = altAllele.length() - 1; newHaplotype = new byte[bases.length + altAlleleLength]; for( int iii = 0; iii < bases.length; iii++ ) { newHaplotype[iii] = bases[iii]; @@ -185,15 +185,16 @@ public class Haplotype { newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; } for( int iii = 0; iii < altAlleleLength; iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; + newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1]; } } else { // deletion final int shift = refAllele.length() - altAllele.length(); + final int altAlleleLength = altAllele.length() - 1; newHaplotype = new byte[bases.length - shift]; - for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) { + for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) { newHaplotype[iii] = bases[iii]; } - for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) { + for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) { newHaplotype[iii] = bases[iii+shift]; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 72681ae35..979400350 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1129,6 +1129,11 @@ public class VariantContext implements Feature { // to enable tribble integratio else throw new ReviewedStingException(message); } + } else { + final long length = (stop - start) + 1; + if ( ! hasSymbolicAlleles() && length != getReference().length() ) { + throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); + } } } @@ -1151,11 +1156,6 @@ public class VariantContext implements Feature { // to enable tribble integratio // make sure there's one reference allele if ( ! alreadySeenRef ) throw new IllegalArgumentException("No reference allele found in VariantContext"); - - final long length = (stop - start) + 1; - if ( ! hasSymbolicAlleles() && length != getReference().length() ) { - throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); - } } private void validateGenotypes() { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 70d365ef8..a8f956413 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -747,7 +747,7 @@ public class VariantContextUtils { if ( !mappedVCs.containsKey(vc.getType()) ) mappedVCs.put(vc.getType(), new ArrayList()); mappedVCs.get(vc.getType()).add(vc); - } + } } return mappedVCs; @@ -809,10 +809,10 @@ public class VariantContextUtils { // // refAllele: ACGTGA // myRef: ACGT - // myAlt: - + // myAlt: A // // We need to remap all of the alleles in vc to include the extra GA so that - // myRef => refAllele and myAlt => GA + // myRef => refAllele and myAlt => AGA // Allele myRef = vc.getReference(); @@ -1335,6 +1335,35 @@ public class VariantContextUtils { } } + public static boolean requiresPaddingBase(final List alleles) { + + // see whether one of the alleles would be null if trimmed through + + for ( final String allele : alleles ) { + if ( allele.isEmpty() ) + return true; + } + + int clipping = 0; + Character currentBase = null; + + while ( true ) { + for ( final String allele : alleles ) { + if ( allele.length() - clipping == 0 ) + return true; + + char myBase = allele.charAt(clipping); + if ( currentBase == null ) + currentBase = myBase; + else if ( currentBase != myBase ) + return false; + } + + clipping++; + currentBase = null; + } + } + public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java index 7a849a819..80eda5ed9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java @@ -23,7 +23,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("27f9450afa132888a8994167f0035fd7")); + Arrays.asList("240d99b58f73985fb114abe9044c0271")); executeTest("Test probes", spec); } @@ -36,7 +36,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); + Arrays.asList("6e7789445e29d91979a21e78d3d53295")); executeTest("Test probes", spec); } @@ -49,7 +49,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); + Arrays.asList("18d7236208db603e143b40db06ef2aca")); executeTest("Test probes", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index bbee99ba6..3b60fa2c2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -98,16 +98,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); } @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format - @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); } + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); } @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index e9b845d59..8f648344d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284")); executeTest("Test reading and writing samtools vcf", spec1); } @@ -66,7 +66,7 @@ public class VCFIntegrationTest extends WalkerTest { String testVCF = privateTestDir + "ex2.vcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27")); executeTest("Test writing samtools WEx BCF example", spec1); }