diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java index f8c9c5396..9809709a8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; @@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.HashMap; import java.util.Map; @@ -39,6 +41,7 @@ import java.util.Map; * @since 10/30/11 */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) public class CompareBAM extends LocusWalker, CompareBAM.TestResults> { @Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java index df4aad0eb..685091678 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java @@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(), referenceSampleVC.getAlleles()) - .referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel()) .genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make()) .make(); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java index a15c9d7da..1fef76116 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java @@ -42,7 +42,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4; private PairHMMIndelErrorModel pairModel; - private boolean allelesArePadded = false; /* private static ThreadLocal>> indelLikelihoodMap = new ThreadLocal>>() { @@ -88,12 +87,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi final List allAllelesToUse){ - final Pair,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true); - List alleles = pair.first; + List alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true); if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE) alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE); - allelesArePadded = pair.second; if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear(); haplotypeMap.clear(); @@ -121,6 +118,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi protected int getEndLocation(final RefMetaDataTracker tracker, final ReferenceContext ref, final List allelesToUse) { - return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded); + return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1; } } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 9df92ba7a..5fc466336 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.*; @@ -184,8 +183,13 @@ public class GenotypingEngine { } @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) - public List>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList haplotypes, final byte[] ref, final GenomeLoc refLoc, - final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList activeAllelesToGenotype ) { + public List>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, + final ArrayList haplotypes, + final byte[] ref, + final GenomeLoc refLoc, + final GenomeLoc activeRegionWindow, + final GenomeLocParser genomeLocParser, + final ArrayList activeAllelesToGenotype ) { final ArrayList>>> returnCalls = new ArrayList>>>(); @@ -423,24 +427,21 @@ public class GenotypingEngine { protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) { final int thisStart = thisVC.getStart(); final int nextStart = nextVC.getStart(); - byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} ); - byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} ); + byte[] refBases = new byte[]{}; + byte[] altBases = new byte[]{}; refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases()); altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases()); - for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) { + int locus; + for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) { final byte refByte = ref[locus - refLoc.getStart()]; refBases = ArrayUtils.add(refBases, refByte); altBases = ArrayUtils.add(altBases, refByte); } - if( nextVC.hasReferenceBaseForIndel() ) { - refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel()); - altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel()); - } - refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases()); + refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases()); int iii = 0; - if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele + if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } } final ArrayList mergedAlleles = new ArrayList(); @@ -533,10 +534,10 @@ public class GenotypingEngine { final int elementLength = ce.getLength(); switch( ce.getOperator() ) { case I: - final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ); + final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base boolean allN = true; - for( final byte b : insertionBases ) { - if( b != (byte) 'N' ) { + for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base + if( insertionBases[i] != (byte) 'N' ) { allN = false; break; } @@ -544,14 +545,13 @@ public class GenotypingEngine { if( !allN ) { final ArrayList insertionAlleles = new ArrayList(); final int insertionStart = refLoc.getStart() + refPos - 1; + insertionAlleles.add( Allele.create(ref[refPos-1], true) ); if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { - insertionAlleles.add( Allele.create(ref[refPos-1], true) ); insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); } else { - insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) ); insertionAlleles.add( Allele.create(insertionBases, false) ); - vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make()); + vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); } } @@ -561,7 +561,7 @@ public class GenotypingEngine { alignmentPos += elementLength; break; case D: - final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength ); + final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base final ArrayList deletionAlleles = new ArrayList(); final int deletionStart = refLoc.getStart() + refPos - 1; // BUGBUG: how often does this symbolic deletion allele case happen? @@ -572,8 +572,8 @@ public class GenotypingEngine { // vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make()); //} else { deletionAlleles.add( Allele.create(deletionBases, true) ); - deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) ); - vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make()); + deletionAlleles.add( Allele.create(ref[refPos-1], false) ); + vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make()); //} refPos += elementLength; break; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java index 8d1ffa0a0..38f11cc5d 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -80,6 +82,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW)) public class HaplotypeResolver extends RodWalker { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java index c97c4ed28..74abb6b11 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; @@ -290,20 +289,22 @@ public class PoolGenotypeLikelihoodsUnitTest { } - @Test + // TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than + // TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated. + @Test (enabled = false) public void testIndelErrorModel() { final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); final byte refByte = refPileupTestProvider.getRefByte(); - final String altBases = "TCA"; + final String altBases = (char)refByte + "TCA"; final String refSampleName = refPileupTestProvider.getSampleNames().get(0); final List trueAlleles = new ArrayList(); - trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true)); - trueAlleles.add(Allele.create("TC", false)); + trueAlleles.add(Allele.create(refByte, true)); + trueAlleles.add(Allele.create((char)refByte + "TC", false)); final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases()); final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles). - genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make(); + genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make(); final int[] matchArray = {95, 995, 9995, 10000}; @@ -333,12 +334,12 @@ public class PoolGenotypeLikelihoodsUnitTest { // create deletion VC final int delLength = 4; final List delAlleles = new ArrayList(); - delAlleles.add(Allele.create(fw.substring(1,delLength+1), true)); - delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false)); + delAlleles.add(Allele.create(fw.substring(0,delLength+1), true)); + delAlleles.add(Allele.create(refByte, false)); final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles). - genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make(); + genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make(); for (int matches: matchArray) { for (int mismatches: mismatchArray) { @@ -392,9 +393,6 @@ public class PoolGenotypeLikelihoodsUnitTest { final byte refByte = readPileupTestProvider.getRefByte(); final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; - final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte); - final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte); - final List allAlleles = new ArrayList(); // this contains only ref Allele up to now final Set laneIDs = new TreeSet(); laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE); @@ -411,11 +409,17 @@ public class PoolGenotypeLikelihoodsUnitTest { for (String laneID : laneIDs) noisyErrorModels.put(laneID, Q30ErrorModel); + final int refIdx = 0; + int altIdx = 2; + + // ref allele must be first + allAlleles.add(Allele.create(refByte, true)); for (byte b: BaseUtils.BASES) { - if (refByte == b) - allAlleles.add(Allele.create(b,true)); - else + if (refByte != b) { + if (b == altByte) + altIdx = allAlleles.size(); allAlleles.add(Allele.create(b, false)); + } } PrintStream out = null; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index 04bb3a753..539190fe9 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // SNP + ref + SNP = MNP with ref base gap thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); @@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // insertion + SNP - thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); @@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // SNP + insertion thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // deletion + SNP - thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); @@ -310,68 +302,66 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // SNP + deletion thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // insertion + deletion = MNP - thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // insertion + deletion - thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // insertion + insertion - thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // deletion + deletion - thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); + thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); + + // deletion + insertion (abutting) + thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make(); + nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make(); + truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); // complex + complex thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make(); @@ -382,8 +372,6 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index a87703423..9b149e8d1 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -30,7 +30,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6"); + HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "d8317f4589e8e0c48bcd087cdb75ce88"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -43,6 +43,5 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testHaplotypeCallerMultiSampleComplex() { HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b"); } - } diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java index 7af83d14b..e8eea5ff0 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java @@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Iterator; @@ -46,6 +48,7 @@ import java.util.Iterator; * @author mhanna * @version 0.1 */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class AlignmentValidation extends ReadWalker { /** * The supporting BWT index generated using BWT. diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index c8554573b..6206fc2ce 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -50,6 +52,7 @@ import java.io.File; * @author mhanna * @version 0.1 */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @WalkerName("Align") public class AlignmentWalker extends ReadWalker { @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + diff --git a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java index b0580fe50..336c95d42 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java +++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java @@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; @@ -48,6 +50,7 @@ import java.util.TreeMap; * @author mhanna * @version 0.1 */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class CountBestAlignments extends ReadWalker { /** * The supporting BWT index generated using BWT. diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index b1ad19e69..306ebdd0e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable { // can't close tribble index when writing if ( message.indexOf("Unable to close index for") != -1 ) exitSystemWithUserError(new UserException(t.getCause().getMessage())); + + // disk is full + if ( message.indexOf("No space left on device") != -1 ) + exitSystemWithUserError(new UserException(t.getMessage())); + if ( t.getCause().getMessage().indexOf("No space left on device") != -1 ) + exitSystemWithUserError(new UserException(t.getCause().getMessage())); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java index 5dbd90405..6780311bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java @@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -17,8 +19,9 @@ import java.util.List; import java.util.Map; /** - * Computes the coverage per sample. + * Computes the coverage per sample for every position (use with -L argument!). */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class CoverageBySample extends LocusWalker { @Output protected PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java index 3069ee528..6482354a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; @@ -46,6 +48,7 @@ import java.io.PrintStream; * * @author aaron */ +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) public class GATKPaperGenotyper extends LocusWalker implements TreeReducible { // the possible diploid genotype strings private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index fe069c2d9..1b75a2c70 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -163,43 +163,58 @@ public class VariantContextAdaptors { @Override public VariantContext convert(String name, Object input, ReferenceContext ref) { OldDbSNPFeature dbsnp = (OldDbSNPFeature)input; - if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) - return null; - Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true); - if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) { - // add the reference allele - List alleles = new ArrayList(); - alleles.add(refAllele); + int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; + if ( index < 0 ) + return null; // we weren't given enough reference context to create the VariantContext - // add all of the alt alleles - boolean sawNullAllele = refAllele.isNull(); - for ( String alt : getAlternateAlleleList(dbsnp) ) { - if ( ! Allele.acceptableAlleleBases(alt) ) { - //System.out.printf("Excluding dbsnp record %s%n", dbsnp); - return null; - } - Allele altAllele = Allele.create(alt, false); - alleles.add(altAllele); - if ( altAllele.isNull() ) - sawNullAllele = true; - } + final byte refBaseForIndel = ref.getBases()[index]; - Map attributes = new HashMap(); - - int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; - if ( index < 0 ) - return null; // we weren't given enough reference context to create the VariantContext - Byte refBaseForIndel = new Byte(ref.getBases()[index]); - - final VariantContextBuilder builder = new VariantContextBuilder(); - builder.source(name).id(dbsnp.getRsID()); - builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0)); - builder.alleles(alleles); - builder.referenceBaseForIndel(refBaseForIndel); - return builder.make(); - } else + boolean addPaddingBase; + if ( isSNP(dbsnp) || isMNP(dbsnp) ) + addPaddingBase = false; + else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) + addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp))); + else return null; // can't handle anything else + + Allele refAllele; + if ( dbsnp.getNCBIRefBase().equals("-") ) + refAllele = Allele.create(refBaseForIndel, true); + else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) + return null; + else + refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true); + + final List alleles = new ArrayList(); + alleles.add(refAllele); + + // add all of the alt alleles + for ( String alt : getAlternateAlleleList(dbsnp) ) { + if ( Allele.wouldBeNullAllele(alt.getBytes())) + alt = ""; + else if ( ! Allele.acceptableAlleleBases(alt) ) + return null; + + alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false)); + } + + final VariantContextBuilder builder = new VariantContextBuilder(); + builder.source(name).id(dbsnp.getRsID()); + builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0)); + builder.alleles(alleles); + return builder.make(); + } + + private static List stripNullDashes(final List alleles) { + final List newAlleles = new ArrayList(alleles.size()); + for ( final String allele : alleles ) { + if ( allele.equals("-") ) + newAlleles.add(""); + else + newAlleles.add(allele); + } + return newAlleles; } } @@ -351,7 +366,7 @@ public class VariantContextAdaptors { long end = hapmap.getEnd(); if ( deletionLength > 0 ) end += deletionLength; - VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make(); + VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make(); return vc; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 523aa81b1..a9edab752 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -42,10 +42,6 @@ import java.util.List; */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { - private static final String REF_ALLELE = "REF"; - - private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) { if ( g == null || !g.isCalled() ) return; @@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa if ( vc.isSNP() ) annotateSNP(stratifiedContext, vc, gb); else if ( vc.isIndel() ) - annotateIndel(stratifiedContext, vc, gb); + annotateIndel(stratifiedContext, ref.getBase(), vc, gb); } - private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { + private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) { HashMap alleleCounts = new HashMap(); for ( Allele allele : vc.getAlleles() ) @@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa gb.AD(counts); } - private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { + private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) { ReadBackedPileup pileup = stratifiedContext.getBasePileup(); if ( pileup == null ) return; - final HashMap alleleCounts = new HashMap(); - alleleCounts.put(REF_ALLELE, 0); + final HashMap alleleCounts = new HashMap(); final Allele refAllele = vc.getReference(); - for ( Allele allele : vc.getAlternateAlleles() ) { - - if ( allele.isNoCall() ) { - continue; // this does not look so good, should we die??? - } - - alleleCounts.put(getAlleleRepresentation(allele), 0); + for ( final Allele allele : vc.getAlleles() ) { + alleleCounts.put(allele, 0); } for ( PileupElement p : pileup ) { if ( p.isBeforeInsertion() ) { - final String b = p.getEventBases(); - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); + final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false); + if ( alleleCounts.containsKey(insertion) ) { + alleleCounts.put(insertion, alleleCounts.get(insertion)+1); } } else if ( p.isBeforeDeletionStart() ) { - if ( p.getEventLength() == refAllele.length() ) { - // this is indeed the deletion allele recorded in VC - final String b = DEL; - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } + if ( p.getEventLength() == refAllele.length() - 1 ) { + // this is indeed the deletion allele recorded in VC + final Allele deletion = Allele.create(refBase); + if ( alleleCounts.containsKey(deletion) ) { + alleleCounts.put(deletion, alleleCounts.get(deletion)+1); } + } } else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) { - alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1); + alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1); } } - int[] counts = new int[alleleCounts.size()]; - counts[0] = alleleCounts.get(REF_ALLELE); + final int[] counts = new int[alleleCounts.size()]; + counts[0] = alleleCounts.get(refAllele); for (int i = 0; i < vc.getAlternateAlleles().size(); i++) - counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) ); + counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) ); gb.AD(counts); } - private String getAlleleRepresentation(Allele allele) { - if ( allele.isNull() ) { // deletion wrt the ref - return DEL; - } else { // insertion, pass actual bases - return allele.getBaseString(); - } - - } - // public String getIndelBases() public List getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java index 0aa05cf89..9eb0e4dda 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java @@ -250,8 +250,6 @@ public class BeagleOutputToVCF extends RodWalker { // Beagle always produces genotype strings based on the strings we input in the likelihood file. String refString = vc_input.getReference().getDisplayString(); - if (refString.length() == 0) // ref was null - refString = Allele.NULL_ALLELE_STRING; Allele bglAlleleA, bglAlleleB; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java index 0224d76a4..fdc333676 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java @@ -239,7 +239,7 @@ public class ProduceBeagleInput extends RodWalker { if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t"); for ( Allele allele : preferredVC.getAlleles() ) { String bglPrintString; - if (allele.isNoCall() || allele.isNull()) + if (allele.isNoCall()) bglPrintString = "-"; else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java index e183a95d8..a6a6d484e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java @@ -149,7 +149,7 @@ public class VariantsToBeagleUnphased extends RodWalker { // write out the alleles at this site for ( Allele allele : vc.getAlleles() ) { - beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" "); + beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" "); } // write out sample level genotypes diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java index 369731530..112eb278e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; @@ -80,6 +82,7 @@ import java.util.*; * @author Mauricio Carneiro, Roger Zurawicki * @since 5/8/12 */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @By(value = DataSource.READS) @PartitionBy(PartitionType.INTERVAL) public class DiagnoseTargets extends LocusWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 8c4cab877..c964b0b4b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -33,9 +34,11 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.CONTIG) @ActiveRegionExtension(extension = 0, maxRegion = 50000) public class FindCoveredIntervals extends ActiveRegionWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java index 9c9a75fc4..92549b821 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java @@ -107,11 +107,11 @@ public class FastaAlternateReference extends FastaReference { continue; if ( vc.isSimpleDeletion()) { - deletionBasesRemaining = vc.getReference().length(); + deletionBasesRemaining = vc.getReference().length() - 1; // delete the next n bases, not this one return new Pair(context.getLocation(), refBase); } else if ( vc.isSimpleInsertion()) { - return new Pair(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); + return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); } else if (vc.isSNP()) { return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java index 8a1af623e..6beade070 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java @@ -26,17 +26,20 @@ package org.broadinstitute.sting.gatk.walkers.fasta; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; /** * Calculates basic statistics about the reference sequence itself */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class FastaStats extends RefWalker { @Output PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index cef09a913..869e52216 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -246,18 +246,19 @@ public class ConsensusAlleleCounter { // get ref bases of accurate deletion final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart(); stop = loc.getStart() + dLen; - final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); + final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding if (Allele.acceptableAlleleBases(refBases, false)) { refAllele = Allele.create(refBases, true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + altAllele = Allele.create(ref.getBase(), false); } else continue; // don't go on with this allele if refBases are non-standard } else { // insertion case - if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(s, false); + final String insertionBases = (char)ref.getBase() + s; // add reference padding + if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions + refAllele = Allele.create(ref.getBase(), true); + altAllele = Allele.create(insertionBases, false); stop = loc.getStart(); } else continue; // go on to next allele if consensus insertion has any non-standard base. @@ -267,7 +268,6 @@ public class ConsensusAlleleCounter { final VariantContextBuilder builder = new VariantContextBuilder().source(""); builder.loc(loc.getContig(), loc.getStart(), stop); builder.alleles(Arrays.asList(refAllele, altAllele)); - builder.referenceBaseForIndel(ref.getBase()); builder.noGenotypes(); if (doMultiAllelicCalls) { vcs.add(builder.make()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 230d6c324..bedffa690 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Haplotype; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; @@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private boolean DEBUG = false; private boolean ignoreSNPAllelesWhenGenotypingIndels = false; private PairHMMIndelErrorModel pairModel; - private boolean allelesArePadded; - + private static ThreadLocal>> indelLikelihoodMap = new ThreadLocal>>() { protected synchronized HashMap> initialValue() { @@ -105,25 +103,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood indelLikelihoodMap.set(new HashMap>()); haplotypeMap.clear(); - Pair,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); - alleleList = pair.first; - allelesArePadded = pair.second; + alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); if (alleleList.isEmpty()) return null; } - getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements if (haplotypeMap == null || haplotypeMap.isEmpty()) return null; // start making the VariantContext // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base. - - final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded); + final int endLoc = loc.getStart() + alleleList.get(0).length() - 1; final int eventLength = getEventLength(alleleList); - final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase()); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList); // create the genotypes; no-call everyone for now GenotypesContext genotypes = GenotypesContext.create(); @@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood return indelLikelihoodMap.get(); } - public static int computeEndLocation(final List alleles, final GenomeLoc loc, final boolean allelesArePadded) { - Allele refAllele = alleles.get(0); - int endLoc = loc.getStart() + refAllele.length()-1; - if (allelesArePadded) - endLoc++; - - return endLoc; - } - public static void getHaplotypeMapFromAlleles(final List alleleList, final ReferenceContext ref, final GenomeLoc loc, @@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } - public static Pair,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker, + public static List getInitialAlleleList(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map contexts, final AlignmentContextUtils.ReadOrientation contextType, final GenomeLocParser locParser, final UnifiedArgumentCollection UAC, final boolean ignoreSNPAllelesWhenGenotypingIndels) { - + List alleles = new ArrayList(); - boolean allelesArePadded = true; if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { VariantContext vc = null; for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { @@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } // ignore places where we don't have a variant if (vc == null) - return new Pair,Boolean>(alleles,false); + return alleles; if (ignoreSNPAllelesWhenGenotypingIndels) { // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it @@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } else { alleles.addAll(vc.getAlleles()); } - if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1) - allelesArePadded = false; - - } else { - alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); + alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); } - return new Pair,Boolean> (alleles,allelesArePadded); + return alleles; } // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 32564984a..f73ab2471 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -283,7 +282,7 @@ public class UnifiedGenotyperEngine { VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles); if ( vcInput == null ) return null; - vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).referenceBaseForIndel(vcInput.getReferenceBaseForIndel()).make(); + vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).make(); } else { // deal with bad/non-standard reference bases if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) @@ -408,11 +407,6 @@ public class UnifiedGenotyperEngine { builder.log10PError(phredScaledConfidence/-10.0); if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); - if ( limitedContext ) { - builder.referenceBaseForIndel(vc.getReferenceBaseForIndel()); - } else { - builder.referenceBaseForIndel(refContext.getBase()); - } // create the genotypes final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy); @@ -493,8 +487,8 @@ public class UnifiedGenotyperEngine { // if we are subsetting alleles (either because there were too many or because some were not polymorphic) // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). - if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed - vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall); + if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) + vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); if ( annotationEngine != null && !limitedContext ) { // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 2153525ab..5e0f15e6a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -872,7 +872,13 @@ public class IndelRealigner extends ReadWalker { for ( VariantContext knownIndel : knownIndelsToTry ) { if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() ) continue; - byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); + final byte[] indelStr; + if ( knownIndel.isSimpleInsertion() ) { + final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases(); + indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding + } else { + indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1); + } int start = knownIndel.getStart() - leftmostIndex + 1; Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel); if ( c != null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java index 0fd047f58..84a65b9b2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java @@ -1131,12 +1131,13 @@ public class SomaticIndelDetector extends ReadWalker { List alleles = new ArrayList(2); // actual observed (distinct!) alleles at the site List homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype + final byte referencePaddingBase = refBases[(int)start-1]; if ( call.getVariant() == null ) { - // we will need to cteate genotype with two (hom) ref alleles (below). + // we will need to create genotype with two (hom) ref alleles (below). // we can not use 'alleles' list here, since that list is supposed to contain // only *distinct* alleles observed at the site or VCFContext will frown upon us... - alleles.add( Allele.create(refBases[(int)start-1],true) ); + alleles.add( Allele.create(referencePaddingBase,true) ); homref_alleles = new ArrayList(2); homref_alleles.add( alleles.get(0)); homref_alleles.add( alleles.get(0)); @@ -1145,7 +1146,7 @@ public class SomaticIndelDetector extends ReadWalker { // (Genotype will tell us whether it is an actual call or not!) int event_length = call.getVariant().lengthOnRef(); if ( event_length < 0 ) event_length = 0; - fillAlleleList(alleles,call); + fillAlleleList(alleles,call,referencePaddingBase); stop += event_length; } @@ -1165,7 +1166,7 @@ public class SomaticIndelDetector extends ReadWalker { filters.add("NoCall"); } VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) - .genotypes(genotypes).filters(filters).referenceBaseForIndel(refBases[(int)start-1]).make(); + .genotypes(genotypes).filters(filters).make(); vcf.add(vc); } @@ -1175,16 +1176,16 @@ public class SomaticIndelDetector extends ReadWalker { * @param l * @param call */ - private void fillAlleleList(List l, IndelPrecall call) { + private void fillAlleleList(List l, IndelPrecall call, byte referencePaddingBase) { int event_length = call.getVariant().lengthOnRef(); if ( event_length == 0 ) { // insertion - l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) ); - l.add( Allele.create(call.getVariant().getBases(), false )); + l.add( Allele.create(referencePaddingBase,true) ); + l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false )); } else { //deletion: - l.add( Allele.create(call.getVariant().getBases(), true )); - l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) ); + l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true )); + l.add( Allele.create(referencePaddingBase,false) ); } } @@ -1218,19 +1219,20 @@ public class SomaticIndelDetector extends ReadWalker { // } boolean homRefT = ( tCall.getVariant() == null ); boolean homRefN = ( nCall.getVariant() == null ); + final byte referencePaddingBase = refBases[(int)start-1]; if ( tCall.getVariant() == null && nCall.getVariant() == null) { // no indel at all ; create base-representation ref/ref alleles for genotype construction - alleles.add( Allele.create(refBases[(int)start-1],true) ); + alleles.add( Allele.create(referencePaddingBase,true) ); } else { // we got indel(s) int event_length = 0; if ( tCall.getVariant() != null ) { // indel in tumor event_length = tCall.getVariant().lengthOnRef(); - fillAlleleList(alleles, tCall); + fillAlleleList(alleles, tCall, referencePaddingBase); } else { event_length = nCall.getVariant().lengthOnRef(); - fillAlleleList(alleles, nCall); + fillAlleleList(alleles, nCall, referencePaddingBase); } if ( event_length > 0 ) stop += event_length; } @@ -1262,7 +1264,7 @@ public class SomaticIndelDetector extends ReadWalker { } VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) - .genotypes(genotypes).filters(filters).attributes(attrs).referenceBaseForIndel(refBases[(int)start-1]).make(); + .genotypes(genotypes).filters(filters).attributes(attrs).make(); vcf.add(vc); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java index 67ecea1f0..9915d617e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; import java.util.*; @@ -70,6 +72,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class CountRODs extends RodWalker, Long>> implements TreeReducible, Long>> { @Output public PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 9676704c2..9d96dedef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -25,6 +25,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -262,20 +263,33 @@ public class ValidationAmplicons extends RodWalker { sequenceInvalid = true; invReason.add("SITE_IS_FILTERED"); } + + String refString = validate.getReference().getDisplayString(); + String altString = validate.getAlternateAllele(0).getDisplayString(); + if ( validate.isIndel() ) { sequence.append(Character.toUpperCase((char)ref.getBase())); rawSequence.append(Character.toUpperCase((char)ref.getBase())); + final byte[] refAllele = validate.getReference().getBases(); + refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length)); + if ( refString.isEmpty() ) + refString = "-"; + final byte[] altAllele = validate.getAlternateAllele(0).getBases(); + altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length)); + if ( altString.isEmpty() ) + altString = "-"; } + sequence.append('['); - sequence.append(validate.getAlternateAllele(0).toString()); + sequence.append(altString); sequence.append('/'); - sequence.append(validate.getReference().toString()); + sequence.append(refString); sequence.append(']'); // do this to the raw sequence to -- the indeces will line up that way rawSequence.append('['); - rawSequence.append(validate.getAlternateAllele(0).getBaseString()); + rawSequence.append(altString); rawSequence.append('/'); - rawSequence.append(validate.getReference().getBaseString()); + rawSequence.append(refString); rawSequence.append(']'); allelePos = ref.getLocus(); if ( indelCounter > 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java index af6a52002..67ddc47ff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -40,14 +39,11 @@ public class GenomeEvent implements Comparable { final protected GenomeLoc loc; /** A set of the alleles segregating in this context */ final protected List alleles; - final protected Byte refBase; // final protected HashMap attributes; - public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List alleles, HashMap attributes, - byte base) { + public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List alleles, HashMap attributes) { this.loc = parser.createGenomeLoc(contig, start, stop); this.alleles = alleles; - this.refBase = base; // this.attributes = attributes; } @@ -68,7 +64,7 @@ public class GenomeEvent implements Comparable { public VariantContext createVariantContextFromEvent() { return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles) - .log10PError(0.0).referenceBaseForIndel(refBase).make(); + .log10PError(0.0).make(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java index 4b68eed2e..7c1d63f02 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java @@ -115,7 +115,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector { // create bare-bones event and log in corresponding bin // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes - GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel()); + GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes); binnedEventArray[binIndex].add(event); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java index eda75d647..4019c5631 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java @@ -65,7 +65,7 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector { } // create bare-bones event and log in corresponding bin // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes - GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel()); + GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes); binnedEventArray.add(event); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index 88bf3aef9..a509294ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -56,7 +56,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator { //increment stats for pairwise mismatches for (Allele allele : genotype.getAlleles()) { - if (allele.isNonNull() && allele.isCalled()) { + if (allele.isCalled()) { String alleleString = allele.toString(); alleleCounts.putIfAbsent(alleleString, 0); alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 235eb1d9b..9fe499a03 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -139,11 +139,11 @@ public class LeftAlignVariants extends RodWalker { final byte[] refSeq = ref.getBases(); // get the indel length - int indelLength; + final int indelLength; if ( vc.isSimpleDeletion() ) - indelLength = vc.getReference().length(); + indelLength = vc.getReference().length() - 1; else - indelLength = vc.getAlternateAllele(0).length(); + indelLength = vc.getAlternateAllele(0).length() - 1; if ( indelLength > 200 ) { writer.add(vc); @@ -151,7 +151,7 @@ public class LeftAlignVariants extends RodWalker { } // create an indel haplotype - int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; + final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength); // create a CIGAR string to represent the event @@ -170,11 +170,12 @@ public class LeftAlignVariants extends RodWalker { VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make(); //System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference)); - int indelIndex = originalIndex-difference; - byte[] newBases = new byte[indelLength]; - System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength); - Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); - newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]); + final int indelIndex = originalIndex-difference; + final byte[] newBases = new byte[indelLength + 1]; + newBases[0] = refSeq[indelIndex-1]; + System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength); + final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); + newVC = updateAllele(newVC, newAllele); writer.add(newVC); return 1; @@ -195,7 +196,7 @@ public class LeftAlignVariants extends RodWalker { if ( vc.isSimpleDeletion() ) { indexOfRef += indelLength; } else { - System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength); + System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength); currentPos += indelLength; } @@ -205,14 +206,14 @@ public class LeftAlignVariants extends RodWalker { return hap; } - public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) { + public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) { // create a mapping from original allele to new allele HashMap alleleMap = new HashMap(vc.getAlleles().size()); if ( newAllele.isReference() ) { alleleMap.put(vc.getReference(), newAllele); - alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0)); + alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false)); } else { - alleleMap.put(vc.getReference(), vc.getReference()); + alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true)); alleleMap.put(vc.getAlternateAllele(0), newAllele); } @@ -229,6 +230,6 @@ public class LeftAlignVariants extends RodWalker { newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make()); } - return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make(); + return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 0d9a4fc03..63209e98c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -119,7 +119,6 @@ public class LiftoverVariants extends RodWalker { if ( toInterval != null ) { // check whether the strand flips, and if so reverse complement everything - // TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size) if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) { vc = VariantContextUtils.reverseComplement(vc); } @@ -132,11 +131,10 @@ public class LiftoverVariants extends RodWalker { .attribute("OriginalStart", fromInterval.getStart()).make(); } - VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); - if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) { + if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) { logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s", - originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(), - originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0))); + originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(), + originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0))); } writer.add(vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 4b793a31e..c92551a73 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -130,35 +130,16 @@ public class ValidateVariants extends RodWalker { return; // get the true reference allele - Allele reportedRefAllele = vc.getReference(); - Allele observedRefAllele = null; - // insertions - if ( vc.isSimpleInsertion() ) { - observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING); + final Allele reportedRefAllele = vc.getReference(); + final int refLength = reportedRefAllele.length(); + if ( refLength > 100 ) { + logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart())); + return; } - // deletions - else if ( vc.isSimpleDeletion() || vc.isMNP() ) { - // we can't validate arbitrarily long deletions - if ( reportedRefAllele.length() > 100 ) { - logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart())); - return; - } - // deletions are associated with the (position of) the last (preceding) non-deleted base; - // hence to get actually deleted bases we need offset = 1 - int offset = vc.isMNP() ? 0 : 1; - byte[] refBytes = ref.getBases(); - byte[] trueRef = new byte[reportedRefAllele.length()]; - for (int i = 0; i < reportedRefAllele.length(); i++) - trueRef[i] = refBytes[i+offset]; - observedRefAllele = Allele.create(trueRef, true); - } - // SNPs, etc. but not mixed types because they are too difficult - else if ( !vc.isMixed() ) { - byte[] refByte = new byte[1]; - refByte[0] = ref.getBase(); - observedRefAllele = Allele.create(refByte, true); - } + final byte[] observedRefBases = new byte[refLength]; + System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength); + final Allele observedRefAllele = Allele.create(observedRefBases); // get the RS IDs Set rsIDs = null; @@ -171,10 +152,10 @@ public class ValidateVariants extends RodWalker { try { switch( type ) { case ALL: - vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs); + vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs); break; case REF: - vc.validateReferenceBases(observedRefAllele, ref.getBase()); + vc.validateReferenceBases(reportedRefAllele, observedRefAllele); break; case IDS: vc.validateRSIDs(rsIDs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 844c4d5fb..b73a498bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -381,7 +381,7 @@ public class VariantsToTable extends RodWalker { getters.put("REF", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); - x.append(vc.getAlleleStringWithRefPadding(vc.getReference())); + x.append(vc.getReference().getDisplayString()); return x.toString(); } }); @@ -393,7 +393,7 @@ public class VariantsToTable extends RodWalker { for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); - x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i))); + x.append(vc.getAlternateAllele(i)); } return x.toString(); } @@ -435,11 +435,8 @@ public class VariantsToTable extends RodWalker { private static Object splitAltAlleles(VariantContext vc) { final int numAltAlleles = vc.getAlternateAlleles().size(); if ( numAltAlleles == 1 ) - return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0)); + return vc.getAlternateAllele(0); - final List alleles = new ArrayList(numAltAlleles); - for ( Allele allele : vc.getAlternateAlleles() ) - alleles.add(vc.getAlleleStringWithRefPadding(allele)); - return alleles; + return vc.getAlternateAlleles(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 787d4d9ab..78c9c4a1c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -103,12 +103,6 @@ public class VariantsToVCF extends RodWalker { @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false) protected String sampleName = null; - /** - * This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF). - */ - @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false) - protected boolean fixReferenceBase = false; - private Set allowedGenotypeFormatStrings = new HashSet(); private boolean wroteHeader = false; private Set samples; @@ -140,10 +134,6 @@ public class VariantsToVCF extends RodWalker { builder.genotypes(g); } - if ( fixReferenceBase ) { - builder.referenceBaseForIndel(ref.getBase()); - } - writeRecord(builder.make(), tracker, ref.getLocus()); } @@ -169,8 +159,8 @@ public class VariantsToVCF extends RodWalker { continue; Map alleleMap = new HashMap(2); - alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion())); - alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 393dd5735..0065f9258 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -431,6 +431,37 @@ public class BaseUtils { return new String(simpleComplement(bases.getBytes())); } + /** + * Returns the uppercased version of the bases + * + * @param bases the bases + * @return the upper cased version + */ + static public byte[] convertToUpperCase(final byte[] bases) { + for ( int i = 0; i < bases.length; i++ ) { + if ( (char)bases[i] >= 'a' ) + bases[i] = toUpperCaseBase(bases[i]); + } + return bases; + } + + static public byte toUpperCaseBase(final byte base) { + switch (base) { + case 'a': + return 'A'; + case 'c': + return 'C'; + case 'g': + return 'G'; + case 't': + return 'T'; + case 'n': + return 'N'; + default: + return base; + } + } + /** * Returns the index of the most common base in the basecounts array. To be used with * pileup.getBaseCounts. diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 829e75682..54442622f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -176,7 +176,7 @@ public class Haplotype { newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; } } else if( refAllele.length() < altAllele.length() ) { // insertion - final int altAlleleLength = altAllele.length(); + final int altAlleleLength = altAllele.length() - 1; newHaplotype = new byte[bases.length + altAlleleLength]; for( int iii = 0; iii < bases.length; iii++ ) { newHaplotype[iii] = bases[iii]; @@ -185,15 +185,16 @@ public class Haplotype { newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; } for( int iii = 0; iii < altAlleleLength; iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; + newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1]; } } else { // deletion final int shift = refAllele.length() - altAllele.length(); + final int altAlleleLength = altAllele.length() - 1; newHaplotype = new byte[bases.length - shift]; - for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) { + for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) { newHaplotype[iii] = bases[iii]; } - for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) { + for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) { newHaplotype[iii] = bases[iii+shift]; } } @@ -204,8 +205,11 @@ public class Haplotype { return new Haplotype(newHaplotype); } - public static LinkedHashMap makeHaplotypeListFromAlleles(List alleleList, int startPos, ReferenceContext ref, - final int haplotypeSize, final int numPrefBases) { + public static LinkedHashMap makeHaplotypeListFromAlleles(final List alleleList, + final int startPos, + final ReferenceContext ref, + final int haplotypeSize, + final int numPrefBases) { LinkedHashMap haplotypeMap = new LinkedHashMap(); @@ -216,7 +220,6 @@ public class Haplotype { refAllele = a; break; } - } if (refAllele == null) @@ -224,19 +227,12 @@ public class Haplotype { byte[] refBases = ref.getBases(); + final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart(); + final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases)); - int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart()); - //int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event - - - byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases); - int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length; // protect against long events that overrun available reference context - if (startAfter > refBases.length) - startAfter = refBases.length; - byte[] basesAfterVariant = Arrays.copyOfRange(refBases, - startAfter, refBases.length); - + final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length); + final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length)); // Create location for all haplotypes final int startLoc = ref.getWindow().getStart() + startIdxInReference; @@ -244,16 +240,14 @@ public class Haplotype { final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc); - for (final Allele a : alleleList) { - byte[] alleleBases = a.getBases(); + final byte[] alleleBases = a.getBases(); // use string concatenation - String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant); + String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant; haplotypeString = haplotypeString.substring(0,haplotypeSize); - haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); - + haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); } return haplotypeMap; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0b9654610..0f9cc34e7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -305,27 +305,6 @@ public final class BCF2Codec implements FeatureCodec { builder.id(id); } - /** - * Annoying routine that deals with allele clipping from the BCF2 encoding to the standard - * GATK encoding. - * - * @param position - * @param ref - * @param unclippedAlleles - * @return - */ - @Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"}) - @Ensures("result.size() == unclippedAlleles.size()") - protected List clipAllelesIfNecessary(final int position, - final String ref, - final List unclippedAlleles) { - // the last argument of 1 allows us to safely ignore the end, because we are - // ultimately going to use the end in the record itself - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1); - if ( clipped.getError() != null ) error(clipped.getError()); - return clipped.getClippedAlleles(); - } - /** * Decode the alleles from this BCF2 file and put the results in builder * @param builder @@ -353,11 +332,9 @@ public final class BCF2Codec implements FeatureCodec { } assert ref != null; - alleles = clipAllelesIfNecessary(pos, ref, alleles); builder.alleles(alleles); assert ref.length() > 0; - builder.referenceBaseForIndel(ref.getBytes()[0]); return alleles; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index b3420514b..996cef8a4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -256,9 +256,20 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec final Map attrs = parseInfo(parts[7]); builder.attributes(attrs); + if ( attrs.containsKey(VCFConstants.END_KEY) ) { + // update stop with the end key if provided + try { + builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString())); + } catch (Exception e) { + generateException("the END value in the INFO field is not valid"); + } + } else { + builder.stop(pos + ref.length() - 1); + } + // get our alleles, filters, and setup an attribute map - final List rawAlleles = parseAlleles(ref, alts, lineNo); - final List alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs); + final List alleles = parseAlleles(ref, alts, lineNo); + builder.alleles(alleles); // do we have genotyping data if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) { @@ -275,7 +286,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec VariantContext vc = null; try { - builder.referenceBaseForIndel(ref.getBytes()[0]); vc = builder.make(); } catch (Exception e) { generateException(e.getMessage()); @@ -284,31 +294,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec return vc; } - private final List updateBuilderAllelesAndStop(final VariantContextBuilder builder, - final String ref, - final int pos, - final List rawAlleles, - final Map attrs) { - int endForSymbolicAlleles = pos; // by default we use the pos - if ( attrs.containsKey(VCFConstants.END_KEY) ) { - // update stop with the end key if provided - try { - endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()); - } catch (Exception e) { - generateException("the END value in the INFO field is not valid"); - } - } - - // find out our current location, and clip the alleles down to their minimum length - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles); - if ( clipped.getError() != null ) - generateException(clipped.getError(), lineNo); - - builder.stop(clipped.getStop()); - builder.alleles(clipped.getClippedAlleles()); - return clipped.getClippedAlleles(); - } - /** * get the name of this codec * @return our set name diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java deleted file mode 100644 index 40ba23d9d..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.codecs.vcf; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.*; - -import java.util.*; - -/** - * All of the gross allele clipping and padding routines in one place - * - * Having attempted to understand / fix / document this code myself - * I can only conclude that this entire approach needs to be rethought. This - * code just doesn't work robustly with symbolic alleles, with multiple alleles, - * requires a special "reference base for indels" stored in the VariantContext - * whose correctness isn't enforced, and overall has strange special cases - * all over the place. - * - * The reason this code is so complex is due to symbolics and multi-alleleic - * variation, which frequently occur when combining variants from multiple - * VCF files. - * - * TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles - * TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT) - * - * @author Mark DePristo - * @since 6/12 - */ -public final class VCFAlleleClipper { - private VCFAlleleClipper() { } - - /** - * Determine whether we should clip off the first base of all unclippped alleles or not - * - * Returns true if all of the alleles in unclippedAlleles share a common first base with - * ref0. Ref0 should be the first base of the reference allele UnclippedAlleles may - * contain the reference allele itself, or just the alternate alleles, it doesn't matter. - * - * The algorithm returns true if the first base should be clipped off, or false otherwise - * - * This algorithm works even in the presence of symbolic alleles, logically ignoring these - * values. It - * - * @param unclippedAlleles list of unclipped alleles to assay - * @param ref0 the first base of the reference allele - * @return true if we should clip the first base of unclippedAlleles - */ - @Requires("unclippedAlleles != null") - public static boolean shouldClipFirstBaseP(final List unclippedAlleles, - final byte ref0) { - boolean allSymbolicAlt = true; - - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) { - continue; - } - - // already know we aren't symbolic, so we only need to decide if we have only seen a ref - if ( ! a.isReference() ) - allSymbolicAlt = false; - - if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { - return false; - } - } - - // to reach here all alleles are consistent with clipping the first base matching ref0 - // but we don't clip if all ALT alleles are symbolic - return ! allSymbolicAlt; - } - - public static int computeReverseClipping(final List unclippedAlleles, - final byte[] ref, - final int forwardClipping, - final boolean allowFullClip) { - int clipping = 0; - boolean stillClipping = true; - - while ( stillClipping ) { - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) - continue; - - // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong - // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). - if ( a.length() - clipping == 0 ) - return clipping - (allowFullClip ? 0 : 1); - - if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { - stillClipping = false; - } - else if ( ref.length == clipping ) { - if ( allowFullClip ) - stillClipping = false; - else - return -1; - } - else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { - stillClipping = false; - } - } - if ( stillClipping ) - clipping++; - } - - return clipping; - } - - /** - * Are the alleles describing a polymorphism substitution one base for another? - * - * @param alleles a list of alleles, must not be empty - * @return Return true if the length of any allele in alleles isn't 1 - */ - @Requires("!alleles.isEmpty()") - private static boolean isSingleNucleotideEvent(final List alleles) { - for ( final Allele a : alleles ) { - if ( a.length() != 1 ) - return false; - } - return true; - } - - /** - * clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened - * - * The ClippedAlleles object contains the implied stop position of the alleles, given the provided start - * position, after clipping. It also contains the list of alleles, in the same order as the provided - * unclipped ones, that are the fully clipped version of the input alleles. If an error occurs - * during this option the getError() function returns a string describing the problem (for use in parsers). - * - * The basic operation are: - * - * single allele - * => stop == start and clipped == unclipped - * any number of single nucleotide events - * => stop == start and clipped == unclipped - * two alleles, second being symbolic - * => stop == start and clipped == unclipped - * Note in this case that the STOP should be computed by other means (from END in VCF, for example) - * Note that if there's more than two alleles and the second is a symbolic the code produces an error - * Any other case: - * The alleles are trimmed of any sequence shared at the end of the alleles. If N bases - * are common then the alleles will all be at least N bases shorter. - * The stop position returned is the start position + the length of the - * reverse trimmed only reference allele - 1. - * If the alleles all share a single common starting sequence (just one base is considered) - * then the alleles have this leading common base removed as well. - * - * TODO This code is gross and brittle and needs to be rethought from scratch - * - * @param start the unadjusted start position (pre-clipping) - * @param ref the reference string - * @param unclippedAlleles the list of unclipped alleles, including the reference allele - * @return the new reference end position of this event - */ - @Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"}) - @Ensures("result != null") - public static ClippedAlleles clipAlleles(final int start, - final String ref, - final List unclippedAlleles, - final int endForSymbolicAllele ) { - // no variation or single nucleotide events are by definition fully clipped - if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) ) - return new ClippedAlleles(start, unclippedAlleles, null); - - // we've got to sort out the clipping by looking at the alleles themselves - final byte firstRefBase = (byte) ref.charAt(0); - final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase); - final int forwardClipping = firstBaseIsClipped ? 1 : 0; - final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false); - final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0; - - if ( reverseClipping == -1 ) - return new ClippedAlleles("computeReverseClipping failed due to bad alleles"); - - boolean sawSymbolic = false; - List clippedAlleles; - if ( ! needsClipping ) { - // there's nothing to clip, so clippedAlleles are the original alleles - clippedAlleles = unclippedAlleles; - } else { - clippedAlleles = new ArrayList(unclippedAlleles.size()); - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) { - sawSymbolic = true; - clippedAlleles.add(a); - } else { - final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping); - if ( !Allele.acceptableAlleleBases(allele) ) - return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]"); - clippedAlleles.add(Allele.create(allele, a.isReference())); - } - } - } - - int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele); - - // TODO - // TODO - // TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1 - // TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED - // TODO - // TODO - if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++; - // TODO - // TODO - // TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1 - // TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED - // TODO - // TODO - - final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null; - return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel); - } - - /** - * Returns true if the alleles in inputVC should have reference bases added for padding - * - * We need to pad a VC with a common base if the length of the reference allele is - * less than the length of the VariantContext. This happens because the position of - * e.g. an indel is always one before the actual event (as per VCF convention). - * - * @param inputVC the VC to evaluate, cannot be null - * @return true if - */ - public static boolean needsPadding(final VariantContext inputVC) { - // biallelic sites with only symbolic never need padding - if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() ) - return false; - - final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1; - final int referenceLength = inputVC.getReference().length(); - - if ( referenceLength == recordLength ) - return false; - else if ( referenceLength == recordLength - 1 ) - return true; - else if ( !inputVC.hasSymbolicAlleles() ) - throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + - " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); - else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() ) - throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC); - return false; - } - - public static Allele padAllele(final VariantContext vc, final Allele allele) { - assert needsPadding(vc); - - if ( allele.isSymbolic() ) - return allele; - else { - // get bases for current allele and create a new one with trimmed bases - final StringBuilder sb = new StringBuilder(); - sb.append((char)vc.getReferenceBaseForIndel().byteValue()); - sb.append(allele.getDisplayString()); - final String newBases = sb.toString(); - return Allele.create(newBases, allele.isReference()); - } - } - - public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) { - final boolean padVC = needsPadding(inputVC); - - // nothing to do if we don't need to pad bases - if ( padVC ) { - if ( !inputVC.hasReferenceBaseForIndel() ) - throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); - - final ArrayList alleles = new ArrayList(inputVC.getNAlleles()); - final Map unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap(inputVC.getNAlleles()) : null; - - boolean paddedAtLeastOne = false; - for (final Allele a : inputVC.getAlleles()) { - final Allele padded = padAllele(inputVC, a); - paddedAtLeastOne = paddedAtLeastOne || padded != a; - alleles.add(padded); - if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make - } - - if ( ! paddedAtLeastOne ) - throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles()); - - final VariantContextBuilder vcb = new VariantContextBuilder(inputVC); - vcb.alleles(alleles); - - // the position of the inputVC is one further, if it doesn't contain symbolic alleles - vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd()); - - if ( inputVC.hasGenotypes() ) { - assert unpaddedToPadded != null; - - // now we can recreate new genotypes with trimmed alleles - final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); - for (final Genotype g : inputVC.getGenotypes() ) { - final List newGenotypeAlleles = new ArrayList(g.getAlleles().size()); - for (final Allele a : g.getAlleles()) { - newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make()); - } - vcb.genotypes(genotypes); - } - - return vcb.make(); - } - else - return inputVC; - - } - - public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { - // see if we need to trim common reference base from all alleles - - final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true); - if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) - return inputVC; - - final List alleles = new ArrayList(); - final GenotypesContext genotypes = GenotypesContext.create(); - final Map originalToTrimmedAlleleMap = new HashMap(); - - for (final Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); - final Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // now we can recreate new genotypes with trimmed alleles - for ( final Genotype genotype : inputVC.getGenotypes() ) { - final List originalAlleles = genotype.getAlleles(); - final List trimmedAlleles = new ArrayList(); - for ( final Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); - } - - return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make(); - } - - @Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both - public static class ClippedAlleles { - private final int stop; - private final List clippedAlleles; - private final Byte refBaseForIndel; - private final String error; - - @Requires({"stop > 0", "clippedAlleles != null"}) - private ClippedAlleles(final int stop, final List clippedAlleles, final Byte refBaseForIndel) { - this.stop = stop; - this.clippedAlleles = clippedAlleles; - this.error = null; - this.refBaseForIndel = refBaseForIndel; - } - - @Requires("error != null") - private ClippedAlleles(final String error) { - this.stop = -1; - this.clippedAlleles = null; - this.refBaseForIndel = null; - this.error = error; - } - - /** - * Get an error if it occurred - * @return the error message, or null if no error occurred - */ - public String getError() { - return error; - } - - /** - * Get the stop position to use after the clipping as been applied, given the - * provided position to clipAlleles - * @return - */ - public int getStop() { - return stop; - } - - /** - * Get the clipped alleles themselves - * @return the clipped alleles in the order of the input unclipped alleles - */ - public List getClippedAlleles() { - return clippedAlleles; - } - - /** - * Returns the reference base we should use for indels, or null if none is appropriate - * @return - */ - public Byte getRefBaseForIndel() { - return refBaseForIndel; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index 2e1770581..2c312678e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.ArrayList; +import org.broadinstitute.sting.utils.BaseUtils; + import java.util.Arrays; import java.util.Collection; -import java.util.List; /** * Immutable representation of an allele @@ -77,32 +77,36 @@ public class Allele implements Comparable { private static final byte[] EMPTY_ALLELE_BASES = new byte[0]; private boolean isRef = false; - private boolean isNull = false; private boolean isNoCall = false; private boolean isSymbolic = false; private byte[] bases = null; - public final static String NULL_ALLELE_STRING = "-"; public final static String NO_CALL_STRING = "."; /** A generic static NO_CALL allele for use */ // no public way to create an allele private Allele(byte[] bases, boolean isRef) { - // standardize our representation of null allele and bases + // null alleles are no longer allowed if ( wouldBeNullAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; - isNull = true; - } else if ( wouldBeNoCallAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; + throw new IllegalArgumentException("Null alleles are not supported"); + } + + // no-calls are represented as no bases + if ( wouldBeNoCallAllele(bases) ) { + this.bases = EMPTY_ALLELE_BASES; isNoCall = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); - } else if ( wouldBeSymbolicAllele(bases) ) { + return; + } + + if ( wouldBeSymbolicAllele(bases) ) { isSymbolic = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele"); } -// else -// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance + else { + bases = BaseUtils.convertToUpperCase(bases); + } this.isRef = isRef; this.bases = bases; @@ -126,8 +130,6 @@ public class Allele implements Comparable { private final static Allele ALT_T = new Allele("T", false); private final static Allele REF_N = new Allele("N", true); private final static Allele ALT_N = new Allele("N", false); - private final static Allele REF_NULL = new Allele(NULL_ALLELE_STRING, true); - private final static Allele ALT_NULL = new Allele(NULL_ALLELE_STRING, false); public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false); // --------------------------------------------------------------------------------------------------------- @@ -154,7 +156,6 @@ public class Allele implements Comparable { case '.': if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); return NO_CALL; - case '-': return isRef ? REF_NULL : ALT_NULL; case 'A': case 'a' : return isRef ? REF_A : ALT_A; case 'C': case 'c' : return isRef ? REF_C : ALT_C; case 'G': case 'g' : return isRef ? REF_G : ALT_G; @@ -179,14 +180,9 @@ public class Allele implements Comparable { public static Allele extend(Allele left, byte[] right) { if (left.isSymbolic()) throw new IllegalArgumentException("Cannot extend a symbolic allele"); - byte[] bases = null; - if ( left.length() == 0 ) - bases = right; - else { - bases = new byte[left.length() + right.length]; - System.arraycopy(left.getBases(), 0, bases, 0, left.length()); - System.arraycopy(right, 0, bases, left.length(), right.length); - } + byte[] bases = new byte[left.length() + right.length]; + System.arraycopy(left.getBases(), 0, bases, 0, left.length()); + System.arraycopy(right, 0, bases, left.length(), right.length); return create(bases, left.isReference()); } @@ -242,7 +238,10 @@ public class Allele implements Comparable { } public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) { - if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) + if ( wouldBeNullAllele(bases) ) + return false; + + if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) return true; for (byte base : bases ) { @@ -299,11 +298,6 @@ public class Allele implements Comparable { // // --------------------------------------------------------------------------------------------------------- - //Returns true if this is the null allele - public boolean isNull() { return isNull; } - // Returns true if this is not the null allele - public boolean isNonNull() { return ! isNull(); } - // Returns true if this is the NO_CALL allele public boolean isNoCall() { return isNoCall; } // Returns true if this is not the NO_CALL allele @@ -319,7 +313,7 @@ public class Allele implements Comparable { // Returns a nice string representation of this object public String toString() { - return (isNull() ? NULL_ALLELE_STRING : ( isNoCall() ? NO_CALL_STRING : getDisplayString() )) + (isReference() ? "*" : ""); + return ( isNoCall() ? NO_CALL_STRING : getDisplayString() ) + (isReference() ? "*" : ""); } /** @@ -384,27 +378,27 @@ public class Allele implements Comparable { * @return true if this and other are equal */ public boolean equals(Allele other, boolean ignoreRefState) { - return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases)); + return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases)); } /** * @param test bases to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); } /** * @param test bases to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } /** * @param test allele to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } @@ -421,10 +415,6 @@ public class Allele implements Comparable { // // --------------------------------------------------------------------------------------------------------- - public static Allele getMatchingAllele(Collection allAlleles, String alleleBases) { - return getMatchingAllele(allAlleles, alleleBases.getBytes()); - } - public static Allele getMatchingAllele(Collection allAlleles, byte[] alleleBases) { for ( Allele a : allAlleles ) { if ( a.basesMatch(alleleBases) ) { @@ -438,26 +428,6 @@ public class Allele implements Comparable { return null; // couldn't find anything } - public static List resolveAlleles(List possibleAlleles, List alleleStrings) { - List myAlleles = new ArrayList(alleleStrings.size()); - - for ( String alleleString : alleleStrings ) { - Allele allele = getMatchingAllele(possibleAlleles, alleleString); - - if ( allele == null ) { - if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) { - allele = create(alleleString); - } else { - throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles); - } - } - - myAlleles.add(allele); - } - - return myAlleles; - } - public int compareTo(Allele other) { if ( isReference() && other.isNonReference() ) return -1; @@ -468,9 +438,6 @@ public class Allele implements Comparable { } public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) { - if ( a1.isNull() || a2.isNull() ) - return true; - if ( a2.length() >= a1.length() ) return firstIsPrefixOfSecond(a1, a2); else diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index dcdd95d00..979400350 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -188,8 +188,6 @@ public class VariantContext implements Feature { // to enable tribble integratio @Deprecated // ID is no longer stored in the attributes map private final static String ID_KEY = "ID"; - private final Byte REFERENCE_BASE_FOR_INDEL; - public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet()); /** The location of this VariantContext */ @@ -228,7 +226,6 @@ public class VariantContext implements Feature { // to enable tribble integratio // --------------------------------------------------------------------------------------------------------- public enum Validation { - REF_PADDING, ALLELES, GENOTYPES } @@ -250,7 +247,7 @@ public class VariantContext implements Feature { // to enable tribble integratio this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getLog10PError(), other.getFiltersMaybeNull(), - other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, + other.getAttributes(), other.fullyDecoded, NO_VALIDATION); } @@ -266,7 +263,6 @@ public class VariantContext implements Feature { // to enable tribble integratio * @param log10PError qual * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes - * @param referenceBaseForIndel padded reference base * @param validationToPerform set of validation steps to take */ protected VariantContext(final String source, @@ -279,7 +275,6 @@ public class VariantContext implements Feature { // to enable tribble integratio final double log10PError, final Set filters, final Map attributes, - final Byte referenceBaseForIndel, final boolean fullyDecoded, final EnumSet validationToPerform ) { if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } @@ -292,7 +287,6 @@ public class VariantContext implements Feature { // to enable tribble integratio this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID; this.commonInfo = new CommonInfo(source, log10PError, filters, attributes); - REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel; // todo -- remove me when this check is no longer necessary if ( this.commonInfo.hasAttribute(ID_KEY) ) @@ -340,8 +334,9 @@ public class VariantContext implements Feature { // to enable tribble integratio * in this VC is returned as the set of alleles in the subContext, even if * some of those alleles aren't in the samples * - * @param sampleNames - * @return + * @param sampleNames the sample names + * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples + * @return new VariantContext subsetting to just the given samples */ public VariantContext subContextFromSamples(Set sampleNames, final boolean rederiveAllelesFromGenotypes ) { if ( sampleNames.containsAll(getSampleNames()) ) { @@ -501,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio */ public boolean isSimpleInsertion() { // can't just call !isSimpleDeletion() because of complex indels - return getType() == Type.INDEL && getReference().isNull() && isBiallelic(); + return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1; } /** @@ -509,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio */ public boolean isSimpleDeletion() { // can't just call !isSimpleInsertion() because of complex indels - return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic(); + return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1; } /** @@ -553,22 +548,6 @@ public class VariantContext implements Feature { // to enable tribble integratio return ID; } - public boolean hasReferenceBaseForIndel() { - return REFERENCE_BASE_FOR_INDEL != null; - } - - // the indel base that gets stripped off for indels - public Byte getReferenceBaseForIndel() { - return REFERENCE_BASE_FOR_INDEL; - } - - public String getAlleleStringWithRefPadding(final Allele allele) { - if ( VCFAlleleClipper.needsPadding(this) ) - return VCFAlleleClipper.padAllele(this, allele).getDisplayString(); - else - return allele.getDisplayString(); - } - // --------------------------------------------------------------------------------------------------------- // @@ -808,8 +787,8 @@ public class VariantContext implements Feature { // to enable tribble integratio * Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map * for consistency with the multi-get function. * - * @param sampleName - * @return + * @param sampleName the sample name + * @return mapping from sample name to genotype * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ public GenotypesContext getGenotypes(String sampleName) { @@ -823,7 +802,7 @@ public class VariantContext implements Feature { // to enable tribble integratio * For testing convenience only * * @param sampleNames a unique list of sample names - * @return + * @return subsetting genotypes context * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ protected GenotypesContext getGenotypes(Collection sampleNames) { @@ -1011,13 +990,13 @@ public class VariantContext implements Feature { // to enable tribble integratio /** * Run all extra-strict validation tests on a Variant Context object * - * @param reference the true reference allele - * @param paddedRefBase the reference base used for padding indels - * @param rsIDs the true dbSNP IDs + * @param reportedReference the reported reference allele + * @param observedReference the actual reference allele + * @param rsIDs the true dbSNP IDs */ - public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set rsIDs) { + public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set rsIDs) { // validate the reference - validateReferenceBases(reference, paddedRefBase); + validateReferenceBases(reportedReference, observedReference); // validate the RS IDs validateRSIDs(rsIDs); @@ -1032,18 +1011,9 @@ public class VariantContext implements Feature { // to enable tribble integratio //checkReferenceTrack(); } - public void validateReferenceBases(Allele reference, Byte paddedRefBase) { - if ( reference == null ) - return; - - // don't validate if we're a complex event - if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) { - throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); - } - - // we also need to validate the padding base for simple indels - if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) { - throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue())); + public void validateReferenceBases(final Allele reportedReference, final Allele observedReference) { + if ( reportedReference != null && !reportedReference.basesMatch(observedReference) ) { + throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), observedReference.getBaseString(), reportedReference.getBaseString())); } } @@ -1135,7 +1105,6 @@ public class VariantContext implements Feature { // to enable tribble integratio for (final Validation val : validationToPerform ) { switch (val) { case ALLELES: validateAlleles(); break; - case REF_PADDING: validateReferencePadding(); break; case GENOTYPES: validateGenotypes(); break; default: throw new IllegalArgumentException("Unexpected validation mode " + val); } @@ -1151,8 +1120,7 @@ public class VariantContext implements Feature { // to enable tribble integratio if ( hasAttribute(VCFConstants.END_KEY) ) { final int end = getAttributeAsInt(VCFConstants.END_KEY, -1); assert end != -1; - if ( end != getEnd() && end != getEnd() + 1 ) { - // the end is allowed to 1 bigger because of the padding + if ( end != getEnd() ) { final String message = "Badly formed variant context at location " + getChr() + ":" + getStart() + "; getEnd() was " + getEnd() + " but this VariantContext contains an END key with value " + end; @@ -1161,23 +1129,19 @@ public class VariantContext implements Feature { // to enable tribble integratio else throw new ReviewedStingException(message); } + } else { + final long length = (stop - start) + 1; + if ( ! hasSymbolicAlleles() && length != getReference().length() ) { + throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); + } } } - private void validateReferencePadding() { - if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding... - return; - - boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed - - if ( needsPadding && !hasReferenceBaseForIndel() ) - throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided."); - } - private void validateAlleles() { - // check alleles - boolean alreadySeenRef = false, alreadySeenNull = false; - for ( Allele allele : alleles ) { + + boolean alreadySeenRef = false; + + for ( final Allele allele : alleles ) { // make sure there's only one reference allele if ( allele.isReference() ) { if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); @@ -1187,26 +1151,11 @@ public class VariantContext implements Feature { // to enable tribble integratio if ( allele.isNoCall() ) { throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this); } - - // make sure there's only one null allele - if ( allele.isNull() ) { - if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this); - alreadySeenNull = true; - } } // make sure there's one reference allele if ( ! alreadySeenRef ) throw new IllegalArgumentException("No reference allele found in VariantContext"); - -// if ( getType() == Type.INDEL ) { -// if ( getReference().length() != (getLocation().size()-1) ) { - long length = (stop - start) + 1; - if ( ! hasSymbolicAlleles() - && ((getReference().isNull() && length != 1 ) - || (getReference().isNonNull() && (length - getReference().length() > 1)))) { - throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); - } } private void validateGenotypes() { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index f2375f6f9..d8ab4bd23 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -25,9 +25,6 @@ package org.broadinstitute.sting.utils.variantcontext; import com.google.java.contract.*; -import org.broad.tribble.Feature; -import org.broad.tribble.TribbleException; -import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -74,7 +71,6 @@ public class VariantContextBuilder { private Set filters = null; private Map attributes = null; private boolean attributesCanBeModified = false; - private Byte referenceBaseForIndel = null; /** enum of what must be validated */ final private EnumSet toValidate = EnumSet.noneOf(VariantContext.Validation.class); @@ -117,7 +113,6 @@ public class VariantContextBuilder { this.genotypes = parent.genotypes; this.ID = parent.getID(); this.log10PError = parent.getLog10PError(); - this.referenceBaseForIndel = parent.getReferenceBaseForIndel(); this.source = parent.getSource(); this.start = parent.getStart(); this.stop = parent.getEnd(); @@ -132,7 +127,6 @@ public class VariantContextBuilder { this.genotypes = parent.genotypes; this.ID = parent.ID; this.log10PError = parent.log10PError; - this.referenceBaseForIndel = parent.referenceBaseForIndel; this.source = parent.source; this.start = parent.start; this.stop = parent.stop; @@ -362,21 +356,6 @@ public class VariantContextBuilder { return this; } - /** - * Tells us that the resulting VariantContext should use this byte for the reference base - * Null means no refBase is available - * @param referenceBaseForIndel - */ - public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) { - this.referenceBaseForIndel = referenceBaseForIndel; - toValidate.add(VariantContext.Validation.REF_PADDING); - return this; - } - - public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) { - return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]); - } - /** * Tells us that the resulting VariantContext should have source field set to source * @param source @@ -401,7 +380,6 @@ public class VariantContextBuilder { this.start = start; this.stop = stop; toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -416,7 +394,6 @@ public class VariantContextBuilder { this.start = loc.getStart(); this.stop = loc.getStop(); toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -440,7 +417,6 @@ public class VariantContextBuilder { public VariantContextBuilder start(final long start) { this.start = start; toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -517,6 +493,6 @@ public class VariantContextBuilder { public VariantContext make() { return new VariantContext(source, ID, contig, start, stop, alleles, genotypes, log10PError, filters, attributes, - referenceBaseForIndel, fullyDecoded, toValidate); + fullyDecoded, toValidate); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index d7e072980..a8f956413 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -64,9 +64,9 @@ public class VariantContextUtils { * Ensures that VC contains all of the samples in allSamples by adding missing samples to * the resulting VC with default diploid ./. genotypes * - * @param vc - * @param allSamples - * @return + * @param vc the VariantContext + * @param allSamples all of the samples needed + * @return a new VariantContext with missing samples added */ public static VariantContext addMissingSamples(final VariantContext vc, final Set allSamples) { // TODO -- what's the fastest way to do this calculation? @@ -376,9 +376,9 @@ public class VariantContextUtils { /** * @deprecated use variant context builder version instead - * @param vc - * @param keysToPreserve - * @return + * @param vc the variant context + * @param keysToPreserve the keys to preserve + * @return a pruned version of the original variant context */ @Deprecated public static VariantContext pruneVariantContext(final VariantContext vc, Collection keysToPreserve ) { @@ -486,14 +486,13 @@ public class VariantContextUtils { if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE ) verifyUniqueSampleNames(unsortedVCs); - final List prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); + final List preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); // Make sure all variant contexts are padded with reference base in case of indels if necessary final List VCs = new ArrayList(); - for (final VariantContext vc : prepaddedVCs) { - // also a reasonable place to remove filtered calls, if needed + for (final VariantContext vc : preFilteredVCs) { if ( ! filteredAreUncalled || vc.isNotFiltered() ) - VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc)); + VCs.add(vc); } if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; @@ -547,9 +546,6 @@ public class VariantContextUtils { filters.addAll(vc.getFilters()); - if ( referenceBaseForIndel == null ) - referenceBaseForIndel = vc.getReferenceBaseForIndel(); - // // add attributes // @@ -661,10 +657,9 @@ public class VariantContextUtils { builder.genotypes(genotypes); builder.log10PError(log10PError); builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); - builder.referenceBaseForIndel(referenceBaseForIndel); // Trim the padded bases of all alleles if necessary - final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make()); + final VariantContext merged = builder.make(); if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); return merged; } @@ -700,73 +695,6 @@ public class VariantContextUtils { return true; } - private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { - // see if we need to trim common reference base from all alleles - boolean trimVC; - - // We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles - Allele refAllele = inputVC.getReference(); - if (!inputVC.isVariant()) - trimVC = false; - else if (refAllele.isNull()) - trimVC = false; - else { - trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0)); - } - - // nothing to do if we don't need to trim bases - if (trimVC) { - List alleles = new ArrayList(); - GenotypesContext genotypes = GenotypesContext.create(); - - Map originalToTrimmedAlleleMap = new HashMap(); - - for (final Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); - Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation - // example: mixed records such as {TA*,TGA,TG} - boolean hasNullAlleles = false; - - for (final Allele a: originalToTrimmedAlleleMap.values()) { - if (a.isNull()) - hasNullAlleles = true; - } - - if (!hasNullAlleles) - return inputVC; - // now we can recreate new genotypes with trimmed alleles - for ( final Genotype genotype : inputVC.getGenotypes() ) { - - List originalAlleles = genotype.getAlleles(); - List trimmedAlleles = new ArrayList(); - for ( final Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); - - } - - final VariantContextBuilder builder = new VariantContextBuilder(inputVC); - return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make(); - } - - return inputVC; - } - public static GenotypesContext stripPLs(GenotypesContext genotypes) { GenotypesContext newGs = GenotypesContext.create(genotypes.size()); @@ -819,7 +747,7 @@ public class VariantContextUtils { if ( !mappedVCs.containsKey(vc.getType()) ) mappedVCs.put(vc.getType(), new ArrayList()); mappedVCs.get(vc.getType()).add(vc); - } + } } return mappedVCs; @@ -881,10 +809,10 @@ public class VariantContextUtils { // // refAllele: ACGTGA // myRef: ACGT - // myAlt: - + // myAlt: A // // We need to remap all of the alleles in vc to include the extra GA so that - // myRef => refAllele and myAlt => GA + // myRef => refAllele and myAlt => AGA // Allele myRef = vc.getReference(); @@ -979,7 +907,7 @@ public class VariantContextUtils { HashMap alleleMap = new HashMap(vc.getAlleles().size()); for ( Allele originalAllele : vc.getAlleles() ) { Allele newAllele; - if ( originalAllele.isNoCall() || originalAllele.isNull() ) + if ( originalAllele.isNoCall() ) newAllele = originalAllele; else newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); @@ -1235,13 +1163,14 @@ public class VariantContextUtils { if ( ! vc.isIndel() ) // only indels are tandem repeats return null; - final Allele ref = vc.getReference(); + final Allele refAllele = vc.getReference(); + final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); byte[] repeatUnit = null; final ArrayList lengths = new ArrayList(); for ( final Allele allele : vc.getAlternateAlleles() ) { - Pair result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes()); + Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes()); final int[] repetitionCount = result.first; // repetition count = 0 means allele is not a tandem expansion of context @@ -1256,7 +1185,7 @@ public class VariantContextUtils { repeatUnit = result.second; if (VERBOSE) { System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); - System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0])); + System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0])); System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); System.out.println("RU:"+new String(repeatUnit)); } @@ -1405,4 +1334,113 @@ public class VariantContextUtils { return start + Math.max(ref.length() - 1, 0); } } + + public static boolean requiresPaddingBase(final List alleles) { + + // see whether one of the alleles would be null if trimmed through + + for ( final String allele : alleles ) { + if ( allele.isEmpty() ) + return true; + } + + int clipping = 0; + Character currentBase = null; + + while ( true ) { + for ( final String allele : alleles ) { + if ( allele.length() - clipping == 0 ) + return true; + + char myBase = allele.charAt(clipping); + if ( currentBase == null ) + currentBase = myBase; + else if ( currentBase != myBase ) + return false; + } + + clipping++; + currentBase = null; + } + } + + public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { + + // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed + + // see whether we need to trim common reference base from all alleles + + final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); + if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) + return inputVC; + + final List alleles = new ArrayList(); + final GenotypesContext genotypes = GenotypesContext.create(); + final Map originalToTrimmedAlleleMap = new HashMap(); + + for (final Allele a : inputVC.getAlleles()) { + if (a.isSymbolic()) { + alleles.add(a); + originalToTrimmedAlleleMap.put(a, a); + } else { + // get bases for current allele and create a new one with trimmed bases + final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); + final Allele trimmedAllele = Allele.create(newBases, a.isReference()); + alleles.add(trimmedAllele); + originalToTrimmedAlleleMap.put(a, trimmedAllele); + } + } + + // now we can recreate new genotypes with trimmed alleles + for ( final Genotype genotype : inputVC.getGenotypes() ) { + final List originalAlleles = genotype.getAlleles(); + final List trimmedAlleles = new ArrayList(); + for ( final Allele a : originalAlleles ) { + if ( a.isCalled() ) + trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); + else + trimmedAlleles.add(Allele.NO_CALL); + } + genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); + } + + return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make(); + } + + public static int computeReverseClipping(final List unclippedAlleles, + final byte[] ref, + final int forwardClipping, + final boolean allowFullClip) { + int clipping = 0; + boolean stillClipping = true; + + while ( stillClipping ) { + for ( final Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) + continue; + + // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong + // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). + if ( a.length() - clipping == 0 ) + return clipping - (allowFullClip ? 0 : 1); + + if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { + stillClipping = false; + } + else if ( ref.length == clipping ) { + if ( allowFullClip ) + stillClipping = false; + else + return -1; + } + else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { + stillClipping = false; + } + } + if ( stillClipping ) + clipping++; + } + + return clipping; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index df2008e8e..b5da206ad 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -274,10 +274,7 @@ class BCF2Writer extends IndexingVariantContextWriter { } private void buildAlleles( VariantContext vc ) throws IOException { - final boolean needsPadding = VCFAlleleClipper.needsPadding(vc); for ( Allele allele : vc.getAlleles() ) { - if ( needsPadding ) - allele = VCFAlleleClipper.padAllele(vc, allele); final byte[] s = allele.getDisplayBases(); if ( s == null ) throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 4548e026e..ea968e153 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -162,7 +162,6 @@ class VCFWriter extends IndexingVariantContextWriter { vc = new VariantContextBuilder(vc).noGenotypes().make(); try { - vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); super.add(vc); Map alleleMap = buildAlleleMap(vc); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 1c5db4262..4611f3a40 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec( "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", 1, - Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); + Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65")); executeTest("testFastaAlternateReferenceIndels", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index 77769a5fe..17149220a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -38,9 +38,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import java.util.*; @@ -103,39 +100,27 @@ public class ArtificialReadPileupTestProvider { boolean addBaseErrors, int phredScaledBaseErrorRate) { // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); - - ArrayList vcAlleles = new ArrayList(); - Allele refAllele, altAllele; - if (eventLength == 0) {// SNP case - refAllele =Allele.create(referenceContext.getBase(),true); - altAllele = Allele.create(altBases.substring(0,1), false); + String refAllele, altAllele; + if (eventLength == 0) { + // SNP case + refAllele = new String(new byte[]{referenceContext.getBase()}); + altAllele = new String(altBases.substring(0,1)); } else if (eventLength>0){ // insertion - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(altBases.substring(0,eventLength), false); + refAllele = ""; + altAllele = altBases.substring(0,eventLength); } else { // deletion - refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + refAllele = refBases.substring(offset,offset+Math.abs(eventLength)); + altAllele = ""; } - int stop = loc.getStart(); - vcAlleles.add(refAllele); - vcAlleles.add(altAllele); - - final VariantContextBuilder builder = new VariantContextBuilder().source(""); - builder.loc(loc.getContig(), loc.getStart(), stop); - builder.alleles(vcAlleles); - builder.referenceBaseForIndel(referenceContext.getBase()); - builder.noGenotypes(); - - final VariantContext vc = builder.make(); Map contexts = new HashMap(); for (String sample: sampleNames) { - AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate)); + AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate)); contexts.put(sample,context); } @@ -149,73 +134,71 @@ public class ArtificialReadPileupTestProvider { rg.setSample(name); return rg; } - private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases, + + private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases, int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) { List pileupElements = new ArrayList(); - int readStart = contigStart; int offset = (contigStop-contigStart+1)/2; - int refAlleleLength = 0; - int readCounter = 0; - int alleleCounter = 0; - for (Allele allele: vc.getAlleles()) { - if (allele.isReference()) - refAlleleLength = allele.getBases().length; - - int alleleLength = allele.getBases().length; - - for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) { - byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); - if (addErrors) - addBaseErrors(readBases, phredScaledErrorRate); - - byte[] readQuals = new byte[readBases.length]; - Arrays.fill(readQuals, (byte)phredScaledErrorRate); - - GATKSAMRecord read = new GATKSAMRecord(header); - read.setBaseQualities(readQuals); - read.setReadBases(readBases); - read.setReadName(artificialReadName+readCounter++); - - boolean isBeforeDeletion = false, isBeforeInsertion = false; - if (allele.isReference()) - read.setCigarString(readBases.length + "M"); - else { - isBeforeDeletion = alleleLengthrefAlleleLength; - if (isBeforeDeletion || isBeforeInsertion) - read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") + - (readBases.length-offset)+"M"); - else // SNP case - read.setCigarString(readBases.length+"M"); - } - - int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0)); - read.setReadPairedFlag(false); - read.setAlignmentStart(readStart); - read.setMappingQuality(artificialMappingQuality); - read.setReferenceName(loc.getContig()); - read.setReadNegativeStrandFlag(false); - read.setAttribute("RG", sampleRG(sample).getReadGroupId()); - - - pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength)); - } - alleleCounter++; - } + int refAlleleLength = refAllele.length(); + pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true)); + pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false)); return new ReadBackedPileupImpl(loc,pileupElements); } - private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) { + private List createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) { + + int alleleLength = allele.length(); + List pileupElements = new ArrayList(); + + int readCounter = 0; + for ( int d = 0; d < numReadsPerAllele; d++ ) { + byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); + if (addErrors) + addBaseErrors(readBases, phredScaledErrorRate); + + byte[] readQuals = new byte[readBases.length]; + Arrays.fill(readQuals, (byte)phredScaledErrorRate); + + GATKSAMRecord read = new GATKSAMRecord(header); + read.setBaseQualities(readQuals); + read.setReadBases(readBases); + read.setReadName(artificialReadName+readCounter++); + + boolean isBeforeDeletion = false, isBeforeInsertion = false; + if (isReference) + read.setCigarString(readBases.length + "M"); + else { + isBeforeDeletion = alleleLengthrefAlleleLength; + if (isBeforeDeletion || isBeforeInsertion) + read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") + + (readBases.length-offset)+"M"); + else // SNP case + read.setCigarString(readBases.length+"M"); + } + + int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0)); + read.setReadPairedFlag(false); + read.setAlignmentStart(readStart); + read.setMappingQuality(artificialMappingQuality); + read.setReferenceName(loc.getContig()); + read.setReadNegativeStrandFlag(false); + read.setAttribute("RG", sampleRG(sample).getReadGroupId()); + + + pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength)); + } + + return pileupElements; + } + + private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) { // create haplotype based on a particular allele - String prefix = refBases.substring(offset); - String alleleBases = new String(allele.getBases()); + String prefix = refBases.substring(0, offset); String postfix = refBases.substring(offset+refAlleleLength,refBases.length()); - return (prefix+alleleBases+postfix).getBytes(); - - - + return (prefix+allele+postfix).getBytes(); } private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java index c7ef51d0c..dfd4bc525 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java @@ -70,7 +70,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { List alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); + Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength)); @@ -79,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { eventLength = 3; alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength)); + Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength)); // same with min Reads = 11 alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases); @@ -97,7 +97,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); + Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength)); altBases = "CCTCNTGAGA"; eventLength = 5; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java index 7a849a819..80eda5ed9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java @@ -23,7 +23,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("27f9450afa132888a8994167f0035fd7")); + Arrays.asList("240d99b58f73985fb114abe9044c0271")); executeTest("Test probes", spec); } @@ -36,7 +36,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); + Arrays.asList("6e7789445e29d91979a21e78d3d53295")); executeTest("Test probes", spec); } @@ -49,7 +49,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); + Arrays.asList("18d7236208db603e143b40db06ef2aca")); executeTest("Test probes", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index bbee99ba6..3b60fa2c2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -98,16 +98,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); } @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format - @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); } + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); } @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 3277f5060..6a3d755d7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -125,4 +125,14 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { executeTest("test bad ref allele in deletion", spec); } + @Test + public void testComplexEvents() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("complexEvents.vcf", "ALL"), + 0, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") + ); + + executeTest("test validating complex events", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java index ec08d97c5..161eefa8f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java @@ -53,11 +53,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases); h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases); } @Test @@ -68,11 +68,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases); h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; - basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases); } @Test @@ -102,11 +102,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases); } @Test @@ -121,11 +121,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC"; - basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases); } @Test diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java deleted file mode 100644 index 8cd051e01..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -package org.broadinstitute.sting.utils.codecs.vcf; - -import com.google.java.contract.Requires; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.testng.Assert; -import org.testng.SkipException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - -public class VCFAlleleClipperUnitTest extends BaseTest { - // -------------------------------------------------------------------------------- - // - // Test allele clipping - // - // -------------------------------------------------------------------------------- - - private class ClipAllelesTest extends TestDataProvider { - final int position; - final int stop; - final String ref; - List inputs; - List expected; - - @Requires("arg.length % 2 == 0") - private ClipAllelesTest(final int position, final int stop, final String ... arg) { - super(ClipAllelesTest.class); - this.position = position; - this.stop = stop; - this.ref = arg[0]; - - int n = arg.length / 2; - inputs = new ArrayList(n); - expected = new ArrayList(n); - - for ( int i = 0; i < n; i++ ) { - final boolean ref = i % n == 0; - inputs.add(Allele.create(arg[i], ref)); - } - for ( int i = n; i < arg.length; i++ ) { - final boolean ref = i % n == 0; - expected.add(Allele.create(arg[i], ref)); - } - } - - public boolean isClipped() { - for ( int i = 0; i < inputs.size(); i++ ) { - if ( inputs.get(i).length() != expected.get(i).length() ) - return true; - } - - return false; - } - - public String toString() { - return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected); - } - } - @DataProvider(name = "ClipAllelesTest") - public Object[][] makeClipAllelesTest() { - // do no harm - new ClipAllelesTest(10, 10, "A", "A"); - new ClipAllelesTest(10, 10, "A", "C", "A", "C"); - new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G"); - - // insertions - new ClipAllelesTest(10, 10, "A", "AA", "-", "A"); - new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA"); - new ClipAllelesTest(10, 10, "A", "AG", "-", "G"); - - // deletions - new ClipAllelesTest(10, 11, "AA", "A", "A", "-"); - new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-"); - new ClipAllelesTest(10, 11, "AG", "A", "G", "-"); - new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-"); - - // multi-allelic insertion and deletions - new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA"); - new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG"); - new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA"); - new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA"); - new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG"); - new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G"); - - // cannot be clipped - new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG"); - new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG"); - - // symbolic - new ClipAllelesTest(10, 100, "A", "", "A", ""); - new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]"); - new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T"); - new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51["); - new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]"); - - // symbolic with alleles that should be clipped - new ClipAllelesTest(10, 100, "A", "", "AA", "-", "", "A"); - new ClipAllelesTest(10, 100, "AA", "", "A", "A", "", "-"); - new ClipAllelesTest(10, 100, "AA", "", "A", "AAA", "A", "", "-", "AA"); - new ClipAllelesTest(10, 100, "AG", "", "A", "AGA", "G", "", "-", "GA"); - new ClipAllelesTest(10, 100, "G", "", "A", "G", "", "A"); - - // clipping from both ends - // - // TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END - // -// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T"); -// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T"); -// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T"); -// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT"); -// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT"); -// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT"); - - // complex substitutions - new ClipAllelesTest(10, 10, "A", "GA", "A", "GA"); - - return ClipAllelesTest.getTests(ClipAllelesTest.class); - } - - @Test(dataProvider = "ClipAllelesTest") - public void testClipAllelesTest(ClipAllelesTest cfg) { - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop); - Assert.assertNull(clipped.getError(), "Unexpected error occurred"); - Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop"); - Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles"); - } - - @Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest") - public void testPaddingAllelesInVC(final ClipAllelesTest cfg) { - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop); - final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles()) - .referenceBaseForIndel(clipped.getRefBaseForIndel()).make(); - - if ( vc.isMixed() && vc.hasSymbolicAlleles() ) - throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized"); - - Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method"); - - if ( cfg.isClipped() ) { - // TODO - // TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is - // TODO actually the original stop, as the original stop is +1 its true size. - // TODO - final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1); - - final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); - Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start"); - Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles"); - Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)"); - Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again"); - } - } - - // -------------------------------------------------------------------------------- - // - // basic allele clipping test - // - // -------------------------------------------------------------------------------- - - private class ReverseClippingPositionTestProvider extends TestDataProvider { - final String ref; - final List alleles = new ArrayList(); - final int expectedClip; - - private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) { - super(ReverseClippingPositionTestProvider.class); - this.ref = ref; - for ( final String allele : alleles ) - this.alleles.add(Allele.create(allele)); - this.expectedClip = expectedClip; - } - - @Override - public String toString() { - return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); - } - } - - @DataProvider(name = "ReverseClippingPositionTestProvider") - public Object[][] makeReverseClippingPositionTestProvider() { - // pair clipping - new ReverseClippingPositionTestProvider(0, "ATT", "CCG"); - new ReverseClippingPositionTestProvider(1, "ATT", "CCT"); - new ReverseClippingPositionTestProvider(2, "ATT", "CTT"); - new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele - - // triplets - new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG"); - new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go - new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go - - return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); - } - - - @Test(dataProvider = "ReverseClippingPositionTestProvider") - public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { - int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); - Assert.assertEquals(result, cfg.expectedClip); - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index e9b845d59..3948ba971 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest { executeTest("Test reading and writing breakpoint VCF", spec1); } + @Test(enabled = true) + public void testReadingLowerCaseBases() { + String testVCF = privateTestDir + "lowercaseBases.vcf"; + + String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; + + String test1 = baseCommand + "-T SelectVariants -V " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e0e308a25e56bde1c664139bb44ed19d")); + executeTest("Test reading VCF with lower-case bases", spec1); + } + @Test(enabled = true) public void testReadingAndWriting1000GSVs() { String testVCF = privateTestDir + "1000G_SVs.chr1.vcf"; @@ -57,7 +68,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284")); executeTest("Test reading and writing samtools vcf", spec1); } @@ -66,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest { String testVCF = privateTestDir + "ex2.vcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27")); executeTest("Test writing samtools WEx BCF example", spec1); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java index ed9805d19..65398c373 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java @@ -37,8 +37,6 @@ import org.testng.annotations.Test; // public Allele(byte[] bases, boolean isRef) { // public Allele(boolean isRef) { // public Allele(String bases, boolean isRef) { -// public boolean isNullAllele() { return length() == 0; } -// public boolean isNonNullAllele() { return ! isNullAllele(); } // public boolean isReference() { return isRef; } // public boolean isNonReference() { return ! isReference(); } // public byte[] getBases() { return bases; } @@ -49,13 +47,10 @@ import org.testng.annotations.Test; * Basic unit test for RecalData */ public class AlleleUnitTest { - Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall; + Allele ARef, A, T, ATIns, ATCIns, NoCall; @BeforeSuite public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); - A = Allele.create("A"); ARef = Allele.create("A", true); T = Allele.create("T"); @@ -72,8 +67,6 @@ public class AlleleUnitTest { Assert.assertFalse(A.isReference()); Assert.assertTrue(A.basesMatch("A")); Assert.assertEquals(A.length(), 1); - Assert.assertTrue(A.isNonNull()); - Assert.assertFalse(A.isNull()); Assert.assertTrue(ARef.isReference()); Assert.assertFalse(ARef.isNonReference()); @@ -92,8 +85,8 @@ public class AlleleUnitTest { Assert.assertFalse(NoCall.isReference()); Assert.assertFalse(NoCall.basesMatch(".")); Assert.assertEquals(NoCall.length(), 0); - Assert.assertTrue(NoCall.isNonNull()); - Assert.assertFalse(NoCall.isNull()); + Assert.assertTrue(NoCall.isNoCall()); + Assert.assertFalse(NoCall.isCalled()); } @@ -103,16 +96,6 @@ public class AlleleUnitTest { Assert.assertEquals(ATCIns.length(), 3); Assert.assertEquals(ATIns.getBases(), "AT".getBytes()); Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes()); - - Assert.assertTrue(del.isNonReference()); - Assert.assertFalse(delRef.isNonReference()); - Assert.assertFalse(del.isReference()); - Assert.assertTrue(delRef.isReference()); - Assert.assertFalse(del.basesMatch("-")); - Assert.assertTrue(del.basesMatch("")); - Assert.assertEquals(del.length(), 0); - Assert.assertFalse(del.isNonNull()); - Assert.assertTrue(del.isNull()); } @@ -128,18 +111,6 @@ public class AlleleUnitTest { Assert.assertFalse(a1.equals(a4)); } - @Test - public void testDelConstructors() { - Allele a1 = Allele.create("-"); - Allele a2 = Allele.create("-".getBytes()); - Allele a3 = Allele.create(""); - Allele a4 = Allele.create("", true); - - Assert.assertTrue(a1.equals(a2)); - Assert.assertTrue(a1.equals(a3)); - Assert.assertFalse(a1.equals(a4)); - } - @Test public void testInsConstructors() { Allele a1 = Allele.create("AC"); @@ -156,7 +127,6 @@ public class AlleleUnitTest { public void testEquals() { Assert.assertTrue(ARef.basesMatch(A)); Assert.assertFalse(ARef.equals(A)); - Assert.assertFalse(ARef.equals(del)); Assert.assertFalse(ARef.equals(ATIns)); Assert.assertFalse(ARef.equals(ATCIns)); @@ -164,11 +134,6 @@ public class AlleleUnitTest { Assert.assertFalse(T.basesMatch(A)); Assert.assertFalse(T.equals(A)); - Assert.assertTrue(del.basesMatch(del)); - Assert.assertTrue(del.basesMatch(delRef)); - Assert.assertTrue(del.equals(del)); - Assert.assertFalse(del.equals(delRef)); - Assert.assertTrue(ATIns.equals(ATIns)); Assert.assertFalse(ATIns.equals(ATCIns)); Assert.assertTrue(ATIns.basesMatch("AT")); @@ -209,7 +174,6 @@ public class AlleleUnitTest { public void testExtend() { Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString()); Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString()); - Assert.assertEquals("A", Allele.extend(Allele.create("-"), "A".getBytes()).toString()); Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString()); Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString()); Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString()); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 1a0e8e39d..b95e589b7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -225,10 +225,10 @@ public class VariantContextTestProvider { add(builder()); add(builder().alleles("A")); add(builder().alleles("A", "C", "T")); - add(builder().alleles("-", "C").referenceBaseForIndel("A")); - add(builder().alleles("-", "CAGT").referenceBaseForIndel("A")); - add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A")); - add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A")); + add(builder().alleles("A", "AC")); + add(builder().alleles("A", "ACAGT")); + add(builder().loc("1", 10, 11).alleles("AC", "A")); + add(builder().loc("1", 10, 13).alleles("ACGT", "A")); // make sure filters work add(builder().unfiltered()); @@ -302,8 +302,8 @@ public class VariantContextTestProvider { sites.add(builder().alleles("A").make()); sites.add(builder().alleles("A", "C", "T").make()); - sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make()); - sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make()); + sites.add(builder().alleles("A", "AC").make()); + sites.add(builder().alleles("A", "ACAGT").make()); for ( VariantContext site : sites ) { addGenotypes(site); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 1d290118f..272166c68 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest { int snpLocStart = 10; int snpLocStop = 10; - // - / ATC [ref] from 20-23 + // - / ATC [ref] from 20-22 String delLoc = "chr1"; int delLocStart = 20; - int delLocStop = 23; + int delLocStop = 22; // - [ref] / ATC from 20-20 String insLoc = "chr1"; int insLocStart = 20; int insLocStop = 20; - // - / A / T / ATC [ref] from 20-23 - String mixedLoc = "chr1"; - int mixedLocStart = 20; - int mixedLocStop = 23; - VariantContextBuilder basicBuilder, snpBuilder, insBuilder; @BeforeSuite public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); + del = Allele.create("A"); + delRef = Allele.create("A", true); A = Allele.create("A"); C = Allele.create("C"); @@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest { @BeforeMethod public void beforeTest() { - basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); - snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); - insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A'); + basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)); + snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)); + insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)); } @Test @@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testCreatingDeletionVariantContext() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); + VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); Assert.assertEquals(vc.getChr(), delLoc); Assert.assertEquals(vc.getStart(), delLocStart); @@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testMatchingAlleles() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); - VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make(); + VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); + VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make(); Assert.assertTrue(vc.hasSameAllelesAs(vc2)); Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2)); @@ -386,13 +381,13 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testAccessingCompleteGenotypes() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T, ATC); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); - Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del)); - Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del)); + Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC)); + Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC)); Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) @@ -408,7 +403,7 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(10, vc.getCalledChrCount()); Assert.assertEquals(3, vc.getCalledChrCount(Aref)); Assert.assertEquals(4, vc.getCalledChrCount(T)); - Assert.assertEquals(3, vc.getCalledChrCount(del)); + Assert.assertEquals(3, vc.getCalledChrCount(ATC)); Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL)); } @@ -416,7 +411,7 @@ public class VariantContextUnitTest extends BaseTest { public void testAccessingRefGenotypes() { List alleles1 = Arrays.asList(Aref, T); List alleles2 = Arrays.asList(Aref); - List alleles3 = Arrays.asList(Aref, T, del); + List alleles3 = Arrays.asList(Aref, T); for ( List alleles : Arrays.asList(alleles1, alleles2, alleles3)) { Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref)); @@ -438,7 +433,7 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testFilters() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); @@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testRepeatAllele() { - Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true); - Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false); - Allele atc = Allele.create("ATC", false); - Allele atcatc = Allele.create("ATCATC", false); - Allele ccccR = Allele.create("CCCC", true); - Allele cc = Allele.create("CC", false); - Allele cccccc = Allele.create("CCCCCC", false); - Allele gagaR = Allele.create("GAGA", true); - Allele gagagaga = Allele.create("GAGAGAGA", false); + Allele nullR = Allele.create("A", true); + Allele nullA = Allele.create("A", false); + Allele atc = Allele.create("AATC", false); + Allele atcatc = Allele.create("AATCATC", false); + Allele ccccR = Allele.create("ACCCC", true); + Allele cc = Allele.create("ACC", false); + Allele cccccc = Allele.create("ACCCCCC", false); + Allele gagaR = Allele.create("AGAGA", true); + Allele gagagaga = Allele.create("AGAGAGAGA", false); Pair,byte[]> result; byte[] refBytes = "TATCATCATCGGA".getBytes(); @@ -497,15 +492,15 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7); - // -*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 + // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[1],4); Assert.assertEquals(result.getSecond().length,3); - // ATC*,-,ATCATC - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ATCref,nullA,atcatc)).make(); + // ATC*,A,ATCATC + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[1],2); @@ -522,7 +517,7 @@ public class VariantContextUnitTest extends BaseTest { // CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9 refBytes = "TCCCCCCCAGAGAGAG".getBytes(); - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],7); Assert.assertEquals(result.getFirst().toArray()[1],5); @@ -532,7 +527,7 @@ public class VariantContextUnitTest extends BaseTest { // GAGA*,-,GAGAGAGA refBytes = "TGAGAGAGAGATTT".getBytes(); - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(gagaR, nullA,gagagaga)).make(); + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],5); Assert.assertEquals(result.getFirst().toArray()[1],3); @@ -564,27 +559,24 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testVCFfromGenotypes() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); - Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del)); - VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make(); + VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4).make(); VariantContext vc12 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true); VariantContext vc1 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName())), true); VariantContext vc23 = vc.subContextFromSamples(new HashSet(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true); VariantContext vc4 = vc.subContextFromSamples(new HashSet(Arrays.asList(g4.getSampleName())), true); VariantContext vc14 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true); - VariantContext vc5 = vc.subContextFromSamples(new HashSet(Arrays.asList(g5.getSampleName())), true); Assert.assertTrue(vc12.isPolymorphicInSamples()); Assert.assertTrue(vc23.isPolymorphicInSamples()); Assert.assertTrue(vc1.isMonomorphicInSamples()); Assert.assertTrue(vc4.isMonomorphicInSamples()); Assert.assertTrue(vc14.isMonomorphicInSamples()); - Assert.assertTrue(vc5.isPolymorphicInSamples()); Assert.assertTrue(vc12.isSNP()); Assert.assertTrue(vc12.isVariant()); @@ -606,17 +598,11 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertFalse(vc14.isVariant()); Assert.assertFalse(vc14.isBiallelic()); - Assert.assertTrue(vc5.isIndel()); - Assert.assertTrue(vc5.isSimpleDeletion()); - Assert.assertTrue(vc5.isVariant()); - Assert.assertTrue(vc5.isBiallelic()); - Assert.assertEquals(3, vc12.getCalledChrCount(Aref)); Assert.assertEquals(1, vc23.getCalledChrCount(Aref)); Assert.assertEquals(2, vc1.getCalledChrCount(Aref)); Assert.assertEquals(0, vc4.getCalledChrCount(Aref)); Assert.assertEquals(2, vc14.getCalledChrCount(Aref)); - Assert.assertEquals(0, vc5.getCalledChrCount(Aref)); } public void testGetGenotypeMethods() { @@ -664,13 +650,12 @@ public class VariantContextUnitTest extends BaseTest { @DataProvider(name = "getAlleles") public Object[][] mergeAllelesData() { new GetAllelesTest("A*", Aref); - new GetAllelesTest("-*", delRef); new GetAllelesTest("A*/C", Aref, C); new GetAllelesTest("A*/C/T", Aref, C, T); new GetAllelesTest("A*/T/C", Aref, T, C); - new GetAllelesTest("A*/C/T/-", Aref, C, T, del); - new GetAllelesTest("A*/T/C/-", Aref, T, C, del); - new GetAllelesTest("A*/-/T/C", Aref, del, T, C); + new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC); + new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC); + new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C); return GetAllelesTest.getTests(GetAllelesTest.class); } @@ -678,7 +663,7 @@ public class VariantContextUnitTest extends BaseTest { @Test(dataProvider = "getAlleles") public void testMergeAlleles(GetAllelesTest cfg) { final List altAlleles = cfg.alleles.subList(1, cfg.alleles.size()); - final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make(); + final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make(); Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles"); Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size"); @@ -845,7 +830,6 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError()); Assert.assertEquals(sub.getFilters(), vc.getFilters()); Assert.assertEquals(sub.getID(), vc.getID()); - Assert.assertEquals(sub.getReferenceBaseForIndel(), vc.getReferenceBaseForIndel()); Assert.assertEquals(sub.getAttributes(), vc.getAttributes()); Set expectedGenotypes = new HashSet(); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index b09a10d07..95e8458c8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -39,7 +39,7 @@ import java.io.FileNotFoundException; import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, C, delRef, Cref, ATC, ATCATC; + Allele Aref, T, C, Cref, ATC, ATCATC; private GenomeLocParser genomeLocParser; @BeforeSuite @@ -56,7 +56,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { // alleles Aref = Allele.create("A", true); Cref = Allele.create("C", true); - delRef = Allele.create("-", true); T = Allele.create("T"); C = Allele.create("C"); ATC = Allele.create("ATC"); @@ -99,7 +98,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { private VariantContext makeVC(String source, List alleles, Collection genotypes, Set filters) { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; - return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make(); + return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make(); } // -------------------------------------------------------------------------------- @@ -156,28 +155,23 @@ public class VariantContextUtilsUnitTest extends BaseTest { Arrays.asList(Aref, C), Arrays.asList(Aref, T, C)); // in order of appearence - // The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant. - // The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference. - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(Cref)); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, ATC), + Arrays.asList(Aref, ATC)); - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATC)); - - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, ATC, ATCATC), + Arrays.asList(Aref, ATC, ATCATC)); // alleles in the order we see them - new MergeAllelesTest(Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATCATC, ATC)); + new MergeAllelesTest(Arrays.asList(Aref, ATCATC), + Arrays.asList(Aref, ATC, ATCATC), + Arrays.asList(Aref, ATCATC, ATC)); // same - new MergeAllelesTest(Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + new MergeAllelesTest(Arrays.asList(Aref, ATC), + Arrays.asList(Aref, ATCATC), + Arrays.asList(Aref, ATC, ATCATC)); return MergeAllelesTest.getTests(MergeAllelesTest.class); } @@ -661,4 +655,52 @@ public class VariantContextUtilsUnitTest extends BaseTest { // test alleles are equal Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); } + + // -------------------------------------------------------------------------------- + // + // basic allele clipping test + // + // -------------------------------------------------------------------------------- + + private class ReverseClippingPositionTestProvider extends TestDataProvider { + final String ref; + final List alleles = new ArrayList(); + final int expectedClip; + + private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) { + super(ReverseClippingPositionTestProvider.class); + this.ref = ref; + for ( final String allele : alleles ) + this.alleles.add(Allele.create(allele)); + this.expectedClip = expectedClip; + } + + @Override + public String toString() { + return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); + } + } + + @DataProvider(name = "ReverseClippingPositionTestProvider") + public Object[][] makeReverseClippingPositionTestProvider() { + // pair clipping + new ReverseClippingPositionTestProvider(0, "ATT", "CCG"); + new ReverseClippingPositionTestProvider(1, "ATT", "CCT"); + new ReverseClippingPositionTestProvider(2, "ATT", "CTT"); + new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele + + // triplets + new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG"); + new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go + new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go + + return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); + } + + + @Test(dataProvider = "ReverseClippingPositionTestProvider") + public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { + int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); + Assert.assertEquals(result, cfg.expectedClip); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java index 6f5756bdc..8f03f1d38 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java @@ -56,7 +56,7 @@ public class VariantJEXLContextUnitTest extends BaseTest { Allele A, Aref, T, Tref; - Allele del, delRef, ATC, ATCref; + Allele ATC, ATCref; // A [ref] / T at 10 GenomeLoc snpLoc; @@ -84,9 +84,6 @@ public class VariantJEXLContextUnitTest extends BaseTest { @BeforeMethod public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); - A = Allele.create("A"); Aref = Allele.create("A", true); T = Allele.create("T"); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java index a7fff4559..5876efa12 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java @@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest { Map attributes = new HashMap(); GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size()); - alleles.add(Allele.create("-",true)); - alleles.add(Allele.create("CC",false)); + alleles.add(Allele.create("A",true)); + alleles.add(Allele.create("ACC",false)); attributes.put("DP","50"); for (String name : header.getGenotypeSamples()) { @@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest { genotypes.add(gt); } return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles) - .genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make(); + .genotypes(genotypes).attributes(attributes).make(); }