From fd3bf36ed93753cf1ff51bc91b500315ce2a7b25 Mon Sep 17 00:00:00 2001 From: Ami Levy Moonshine Date: Fri, 13 Jul 2012 14:23:50 -0400 Subject: [PATCH 001/176] changed license --- licensing/GATK1_LICENSE | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/licensing/GATK1_LICENSE b/licensing/GATK1_LICENSE index 648ec8fc3..080ea1e68 100644 --- a/licensing/GATK1_LICENSE +++ b/licensing/GATK1_LICENSE @@ -1,4 +1,6 @@ -Copyright (c) 2012 The Broad Institute +Copyright (c) 2013 The Broad Institute + +Extra text Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation From 5d0a7335ead24af1b63a92c1846cd8f3f6a7c344 Mon Sep 17 00:00:00 2001 From: Ami Levy Moonshine Date: Fri, 13 Jul 2012 15:19:45 -0400 Subject: [PATCH 002/176] remove unnecessary use in the PRIORITY list remove unneeded imports --- .../sting/gatk/walkers/variantutils/CombineVariants.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 94fa29173..c7bc790a9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -185,7 +184,8 @@ public class CombineVariants extends RodWalker { logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option"); if ( PRIORITY_STRING == null ) { - PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); + genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED; + //PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12) logger.info("Priority string not provided, using arbitrary genotyping order: " + PRIORITY_STRING); } From 32516a2f603cab4d77a9ac36e14dcd5be621ec15 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 26 Jul 2012 01:50:39 -0400 Subject: [PATCH 007/176] Initial checkpoint commit of VariantContext/Allele refactoring. There were just too many problems associated with the different representation of alleles in VCF (padded) vs. VariantContext (unpadded). We are moving VC to use the VCF representation. No more reference base for indels in VC and no more trimming and padding of alleles. Even reverse trimming has been stopped (the theory being that writers of VCF now know what they are doing and often want the reverse padding if they put it there; this has been requested on GetSatisfaction). Code compiles but presumably pretty much all tests with indels with fail at this point. --- ...olGenotypeLikelihoodsCalculationModel.java | 1 - .../haplotypecaller/GenotypingEngine.java | 28 +- .../GenotypingEngineUnitTest.java | 46 +- .../gatk/refdata/VariantContextAdaptors.java | 72 +-- .../annotator/DepthPerAlleleBySample.java | 57 +-- .../walkers/beagle/BeagleOutputToVCF.java | 2 - .../walkers/beagle/ProduceBeagleInput.java | 2 +- .../beagle/VariantsToBeagleUnphased.java | 2 +- .../genotyper/ConsensusAlleleCounter.java | 12 +- ...elGenotypeLikelihoodsCalculationModel.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 14 +- .../walkers/indels/SomaticIndelDetector.java | 28 +- .../validationsiteselector/GenomeEvent.java | 8 +- .../KeepAFSpectrumFrequencySelector.java | 2 +- .../UniformSamplingFrequencySelector.java | 2 +- .../evaluators/ThetaVariantEvaluator.java | 2 +- .../variantutils/LeftAlignVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 8 +- .../variantutils/ValidateVariants.java | 39 +- .../walkers/variantutils/VariantsToTable.java | 11 +- .../walkers/variantutils/VariantsToVCF.java | 14 +- .../broadinstitute/sting/utils/BaseUtils.java | 31 ++ .../sting/utils/codecs/bcf2/BCF2Codec.java | 23 - .../utils/codecs/vcf/AbstractVCFCodec.java | 31 +- .../utils/codecs/vcf/VCFAlleleClipper.java | 434 ------------------ .../sting/utils/variantcontext/Allele.java | 67 +-- .../utils/variantcontext/VariantContext.java | 96 +--- .../variantcontext/VariantContextBuilder.java | 26 +- .../variantcontext/VariantContextUtils.java | 94 +--- .../variantcontext/writer/BCF2Writer.java | 3 - .../variantcontext/writer/VCFWriter.java | 1 - .../utils/variantcontext/AlleleUnitTest.java | 10 +- .../VariantContextUnitTest.java | 1 - 33 files changed, 218 insertions(+), 953 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java index 37b676601..3e0bdd2ea 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java @@ -90,7 +90,6 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi return new VariantContextBuilder("pc",referenceSampleVC.getChr(), referenceSampleVC.getStart(), referenceSampleVC.getEnd(), referenceSampleVC.getAlleles()) - .referenceBaseForIndel(referenceSampleVC.getReferenceBaseForIndel()) .genotypes(new GenotypeBuilder(UAC.referenceSampleName, referenceAlleles).GQ(referenceGenotype.getGQ()).make()) .make(); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index e2445e926..ad468f657 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.*; @@ -419,8 +418,8 @@ public class GenotypingEngine { protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) { final int thisStart = thisVC.getStart(); final int nextStart = nextVC.getStart(); - byte[] refBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} ); - byte[] altBases = ( thisVC.hasReferenceBaseForIndel() ? new byte[]{ thisVC.getReferenceBaseForIndel() } : new byte[]{} ); + byte[] refBases = ( new byte[]{} ); + byte[] altBases = ( new byte[]{} ); refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases()); altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases()); for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) { @@ -428,15 +427,11 @@ public class GenotypingEngine { refBases = ArrayUtils.add(refBases, refByte); altBases = ArrayUtils.add(altBases, refByte); } - if( nextVC.hasReferenceBaseForIndel() ) { - refBases = ArrayUtils.add(refBases, nextVC.getReferenceBaseForIndel()); - altBases = ArrayUtils.add(altBases, nextVC.getReferenceBaseForIndel()); - } refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases()); altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases()); int iii = 0; - if( refBases.length == altBases.length && VCFAlleleClipper.needsPadding(thisVC) ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele + if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } } final ArrayList mergedAlleles = new ArrayList(); @@ -530,10 +525,10 @@ public class GenotypingEngine { final int elementLength = ce.getLength(); switch( ce.getOperator() ) { case I: - final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ); + final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base boolean allN = true; - for( final byte b : insertionBases ) { - if( b != (byte) 'N' ) { + for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base + if( insertionBases[i] != (byte) 'N' ) { allN = false; break; } @@ -541,14 +536,13 @@ public class GenotypingEngine { if( !allN ) { final ArrayList insertionAlleles = new ArrayList(); final int insertionStart = refLoc.getStart() + refPos - 1; + insertionAlleles.add( Allele.create(ref[refPos-1], true) ); if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { - insertionAlleles.add( Allele.create(ref[refPos-1], true) ); insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); } else { - insertionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, true) ); insertionAlleles.add( Allele.create(insertionBases, false) ); - vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).referenceBaseForIndel(ref[refPos-1]).make()); + vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); } } @@ -558,7 +552,7 @@ public class GenotypingEngine { alignmentPos += elementLength; break; case D: - final byte[] deletionBases = Arrays.copyOfRange( ref, refPos, refPos + elementLength ); + final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base final ArrayList deletionAlleles = new ArrayList(); final int deletionStart = refLoc.getStart() + refPos - 1; // BUGBUG: how often does this symbolic deletion allele case happen? @@ -569,8 +563,8 @@ public class GenotypingEngine { // vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make()); //} else { deletionAlleles.add( Allele.create(deletionBases, true) ); - deletionAlleles.add( Allele.create(Allele.NULL_ALLELE_STRING, false) ); - vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).referenceBaseForIndel(ref[refPos-1]).make()); + deletionAlleles.add( Allele.create(ref[refPos-1], false) ); + vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make()); //} refPos += elementLength; break; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index 04bb3a753..4bcf5a0a0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -262,8 +262,6 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // SNP + ref + SNP = MNP with ref base gap thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); @@ -274,11 +272,9 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // insertion + SNP - thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TAAAAACG").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); @@ -286,23 +282,19 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // SNP + insertion thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","AAAAA").referenceBaseForIndel("C").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CAAAAA").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","GCCAAAAA").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // deletion + SNP - thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("C","-").referenceBaseForIndel("T").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","T").make(); nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","G").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TG").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); @@ -310,68 +302,56 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // SNP + deletion thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","G").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","GCC").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // insertion + deletion = MNP - thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); truthVC = new VariantContextBuilder().loc("2", 1704, 1706).alleles("CCG","ACC").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // insertion + deletion - thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","AAAAA").referenceBaseForIndel("T").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TAAAAA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1706).alleles("TCCG","TAAAAACC").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // insertion + insertion - thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("-","A").referenceBaseForIndel("T").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("-","A").referenceBaseForIndel("C").make(); + thisVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("T","TA").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1705).alleles("C","CA").make(); truthVC = new VariantContextBuilder().loc("2", 1703, 1705).alleles("TCC","TACCA").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // deletion + deletion - thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("T","-").referenceBaseForIndel("A").make(); - nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("G","-").referenceBaseForIndel("C").make(); + thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make(); + nextVC = new VariantContextBuilder().loc("2", 1705, 1706).alleles("CG","C").make(); truthVC = new VariantContextBuilder().loc("2", 1701, 1706).alleles("ATTCCG","ATCC").source("merged").make(); mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); logger.warn(truthVC + " == " + mergedVC); Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); // complex + complex thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make(); @@ -382,8 +362,6 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); - Assert.assertEquals(truthVC.hasReferenceBaseForIndel(), mergedVC.hasReferenceBaseForIndel()); - Assert.assertEquals(truthVC.getReferenceBaseForIndel(), mergedVC.getReferenceBaseForIndel()); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index fe069c2d9..dd1eea8a4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -163,43 +163,45 @@ public class VariantContextAdaptors { @Override public VariantContext convert(String name, Object input, ReferenceContext ref) { OldDbSNPFeature dbsnp = (OldDbSNPFeature)input; - if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) + + int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; + if ( index < 0 ) + return null; // we weren't given enough reference context to create the VariantContext + + final byte refBaseForIndel = ref.getBases()[index]; + + Allele refAllele; + if ( dbsnp.getNCBIRefBase().equals("-") ) + refAllele = Allele.create(refBaseForIndel); + else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) return null; - Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true); + else + refAllele = Allele.create(refBaseForIndel + dbsnp.getNCBIRefBase(), true); - if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) { - // add the reference allele - List alleles = new ArrayList(); - alleles.add(refAllele); - - // add all of the alt alleles - boolean sawNullAllele = refAllele.isNull(); - for ( String alt : getAlternateAlleleList(dbsnp) ) { - if ( ! Allele.acceptableAlleleBases(alt) ) { - //System.out.printf("Excluding dbsnp record %s%n", dbsnp); - return null; - } - Allele altAllele = Allele.create(alt, false); - alleles.add(altAllele); - if ( altAllele.isNull() ) - sawNullAllele = true; - } - - Map attributes = new HashMap(); - - int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; - if ( index < 0 ) - return null; // we weren't given enough reference context to create the VariantContext - Byte refBaseForIndel = new Byte(ref.getBases()[index]); - - final VariantContextBuilder builder = new VariantContextBuilder(); - builder.source(name).id(dbsnp.getRsID()); - builder.loc(dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0)); - builder.alleles(alleles); - builder.referenceBaseForIndel(refBaseForIndel); - return builder.make(); - } else + boolean addPaddingBase; + if ( isSNP(dbsnp) || isMNP(dbsnp) ) + addPaddingBase = false; + else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) + addPaddingBase = true; + else return null; // can't handle anything else + + final List alleles = new ArrayList(); + alleles.add(refAllele); + + // add all of the alt alleles + for ( String alt : getAlternateAlleleList(dbsnp) ) { + if ( ! Allele.acceptableAlleleBases(alt) ) { + return null; + } + alleles.add(Allele.create((addPaddingBase ? refBaseForIndel : "") + alt, false)); + } + + final VariantContextBuilder builder = new VariantContextBuilder(); + builder.source(name).id(dbsnp.getRsID()); + builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0)); + builder.alleles(alleles); + return builder.make(); } } @@ -351,7 +353,7 @@ public class VariantContextAdaptors { long end = hapmap.getEnd(); if ( deletionLength > 0 ) end += deletionLength; - VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).referenceBaseForIndel(refBaseForIndel).make(); + VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make(); return vc; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 523aa81b1..261f6433b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -42,10 +42,6 @@ import java.util.List; */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { - private static final String REF_ALLELE = "REF"; - - private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) { if ( g == null || !g.isCalled() ) return; @@ -53,10 +49,10 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa if ( vc.isSNP() ) annotateSNP(stratifiedContext, vc, gb); else if ( vc.isIndel() ) - annotateIndel(stratifiedContext, vc, gb); + annotateIndel(stratifiedContext, ref.getBase(), vc, gb); } - private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { + private void annotateSNP(final AlignmentContext stratifiedContext, final VariantContext vc, final GenotypeBuilder gb) { HashMap alleleCounts = new HashMap(); for ( Allele allele : vc.getAlleles() ) @@ -77,62 +73,47 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa gb.AD(counts); } - private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) { + private void annotateIndel(final AlignmentContext stratifiedContext, final byte refBase, final VariantContext vc, final GenotypeBuilder gb) { ReadBackedPileup pileup = stratifiedContext.getBasePileup(); if ( pileup == null ) return; - final HashMap alleleCounts = new HashMap(); - alleleCounts.put(REF_ALLELE, 0); + final HashMap alleleCounts = new HashMap(); final Allele refAllele = vc.getReference(); - for ( Allele allele : vc.getAlternateAlleles() ) { - - if ( allele.isNoCall() ) { - continue; // this does not look so good, should we die??? - } - - alleleCounts.put(getAlleleRepresentation(allele), 0); + for ( final Allele allele : vc.getAlleles() ) { + alleleCounts.put(allele, 0); } for ( PileupElement p : pileup ) { if ( p.isBeforeInsertion() ) { - final String b = p.getEventBases(); - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); + final Allele insertion = Allele.create(refBase + p.getEventBases(), false); + if ( alleleCounts.containsKey(insertion) ) { + alleleCounts.put(insertion, alleleCounts.get(insertion)+1); } } else if ( p.isBeforeDeletionStart() ) { - if ( p.getEventLength() == refAllele.length() ) { - // this is indeed the deletion allele recorded in VC - final String b = DEL; - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } + if ( p.getEventLength() == refAllele.length() + 1 ) { + // this is indeed the deletion allele recorded in VC + final Allele deletion = Allele.create(refBase); + if ( alleleCounts.containsKey(deletion) ) { + alleleCounts.put(deletion, alleleCounts.get(deletion)+1); } + } } else if ( p.getRead().getAlignmentEnd() > vc.getStart() ) { - alleleCounts.put(REF_ALLELE, alleleCounts.get(REF_ALLELE)+1); + alleleCounts.put(refAllele, alleleCounts.get(refAllele)+1); } } - int[] counts = new int[alleleCounts.size()]; - counts[0] = alleleCounts.get(REF_ALLELE); + final int[] counts = new int[alleleCounts.size()]; + counts[0] = alleleCounts.get(refAllele); for (int i = 0; i < vc.getAlternateAlleles().size(); i++) - counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) ); + counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) ); gb.AD(counts); } - private String getAlleleRepresentation(Allele allele) { - if ( allele.isNull() ) { // deletion wrt the ref - return DEL; - } else { // insertion, pass actual bases - return allele.getBaseString(); - } - - } - // public String getIndelBases() public List getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java index 627d561f6..c8abbfa5a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java @@ -247,8 +247,6 @@ public class BeagleOutputToVCF extends RodWalker { // Beagle always produces genotype strings based on the strings we input in the likelihood file. String refString = vc_input.getReference().getDisplayString(); - if (refString.length() == 0) // ref was null - refString = Allele.NULL_ALLELE_STRING; Allele bglAlleleA, bglAlleleB; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java index 14e92a066..470a1d477 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java @@ -236,7 +236,7 @@ public class ProduceBeagleInput extends RodWalker { if ( markers != null ) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t"); for ( Allele allele : preferredVC.getAlleles() ) { String bglPrintString; - if (allele.isNoCall() || allele.isNull()) + if (allele.isNoCall()) bglPrintString = "-"; else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java index 6d83a1d2a..f338f0124 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java @@ -146,7 +146,7 @@ public class VariantsToBeagleUnphased extends RodWalker { // write out the alleles at this site for ( Allele allele : vc.getAlleles() ) { - beagleOut.append(allele.isNoCall() || allele.isNull() ? "-" : allele.getBaseString()).append(" "); + beagleOut.append(allele.isNoCall() ? "-" : allele.getBaseString()).append(" "); } // write out sample level genotypes diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index cef09a913..d2071a9fb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -246,18 +246,19 @@ public class ConsensusAlleleCounter { // get ref bases of accurate deletion final int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart(); stop = loc.getStart() + dLen; - final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); + final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference - 1, startIdxInReference + dLen); // add reference padding if (Allele.acceptableAlleleBases(refBases, false)) { refAllele = Allele.create(refBases, true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + altAllele = Allele.create(ref.getBase(), false); } else continue; // don't go on with this allele if refBases are non-standard } else { // insertion case - if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(s, false); + final String insertionBases = ref.getBase() + s; // add reference padding + if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions + refAllele = Allele.create(ref.getBase(), true); + altAllele = Allele.create(insertionBases, false); stop = loc.getStart(); } else continue; // go on to next allele if consensus insertion has any non-standard base. @@ -267,7 +268,6 @@ public class ConsensusAlleleCounter { final VariantContextBuilder builder = new VariantContextBuilder().source(""); builder.loc(loc.getContig(), loc.getStart(), stop); builder.alleles(Arrays.asList(refAllele, altAllele)); - builder.referenceBaseForIndel(ref.getBase()); builder.noGenotypes(); if (doMultiAllelicCalls) { vcs.add(builder.make()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 230d6c324..7eabe7a18 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -123,7 +123,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded); final int eventLength = getEventLength(alleleList); - final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase()); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList); // create the genotypes; no-call everyone for now GenotypesContext genotypes = GenotypesContext.create(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 32564984a..d4c45e19d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -283,7 +282,7 @@ public class UnifiedGenotyperEngine { VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles); if ( vcInput == null ) return null; - vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).referenceBaseForIndel(vcInput.getReferenceBaseForIndel()).make(); + vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()).make(); } else { // deal with bad/non-standard reference bases if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) @@ -408,11 +407,6 @@ public class UnifiedGenotyperEngine { builder.log10PError(phredScaledConfidence/-10.0); if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); - if ( limitedContext ) { - builder.referenceBaseForIndel(vc.getReferenceBaseForIndel()); - } else { - builder.referenceBaseForIndel(refContext.getBase()); - } // create the genotypes final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy); @@ -491,10 +485,8 @@ public class UnifiedGenotyperEngine { builder.attributes(attributes); VariantContext vcCall = builder.make(); - // if we are subsetting alleles (either because there were too many or because some were not polymorphic) - // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). - if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed - vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall); + // TODO -- if we are subsetting alleles (either because there were too many or because some were not polymorphic) + // TODO -- then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). if ( annotationEngine != null && !limitedContext ) { // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java index 21db1412b..0c7e2ec5f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java @@ -1128,12 +1128,13 @@ public class SomaticIndelDetector extends ReadWalker { List alleles = new ArrayList(2); // actual observed (distinct!) alleles at the site List homref_alleles = null; // when needed, will contain two identical copies of ref allele - needed to generate hom-ref genotype + final byte referencePaddingBase = refBases[(int)start-1]; if ( call.getVariant() == null ) { - // we will need to cteate genotype with two (hom) ref alleles (below). + // we will need to create genotype with two (hom) ref alleles (below). // we can not use 'alleles' list here, since that list is supposed to contain // only *distinct* alleles observed at the site or VCFContext will frown upon us... - alleles.add( Allele.create(refBases[(int)start-1],true) ); + alleles.add( Allele.create(referencePaddingBase,true) ); homref_alleles = new ArrayList(2); homref_alleles.add( alleles.get(0)); homref_alleles.add( alleles.get(0)); @@ -1142,7 +1143,7 @@ public class SomaticIndelDetector extends ReadWalker { // (Genotype will tell us whether it is an actual call or not!) int event_length = call.getVariant().lengthOnRef(); if ( event_length < 0 ) event_length = 0; - fillAlleleList(alleles,call); + fillAlleleList(alleles,call,referencePaddingBase); stop += event_length; } @@ -1162,7 +1163,7 @@ public class SomaticIndelDetector extends ReadWalker { filters.add("NoCall"); } VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) - .genotypes(genotypes).filters(filters).referenceBaseForIndel(refBases[(int)start-1]).make(); + .genotypes(genotypes).filters(filters).make(); vcf.add(vc); } @@ -1172,16 +1173,16 @@ public class SomaticIndelDetector extends ReadWalker { * @param l * @param call */ - private void fillAlleleList(List l, IndelPrecall call) { + private void fillAlleleList(List l, IndelPrecall call, byte referencePaddingBase) { int event_length = call.getVariant().lengthOnRef(); if ( event_length == 0 ) { // insertion - l.add( Allele.create(Allele.NULL_ALLELE_STRING,true) ); - l.add( Allele.create(call.getVariant().getBases(), false )); + l.add( Allele.create(referencePaddingBase,true) ); + l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false )); } else { //deletion: - l.add( Allele.create(call.getVariant().getBases(), true )); - l.add( Allele.create(Allele.NULL_ALLELE_STRING,false) ); + l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true )); + l.add( Allele.create(referencePaddingBase,false) ); } } @@ -1215,19 +1216,20 @@ public class SomaticIndelDetector extends ReadWalker { // } boolean homRefT = ( tCall.getVariant() == null ); boolean homRefN = ( nCall.getVariant() == null ); + final byte referencePaddingBase = refBases[(int)start-1]; if ( tCall.getVariant() == null && nCall.getVariant() == null) { // no indel at all ; create base-representation ref/ref alleles for genotype construction - alleles.add( Allele.create(refBases[(int)start-1],true) ); + alleles.add( Allele.create(referencePaddingBase,true) ); } else { // we got indel(s) int event_length = 0; if ( tCall.getVariant() != null ) { // indel in tumor event_length = tCall.getVariant().lengthOnRef(); - fillAlleleList(alleles, tCall); + fillAlleleList(alleles, tCall, referencePaddingBase); } else { event_length = nCall.getVariant().lengthOnRef(); - fillAlleleList(alleles, nCall); + fillAlleleList(alleles, nCall, referencePaddingBase); } if ( event_length > 0 ) stop += event_length; } @@ -1259,7 +1261,7 @@ public class SomaticIndelDetector extends ReadWalker { } VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) - .genotypes(genotypes).filters(filters).attributes(attrs).referenceBaseForIndel(refBases[(int)start-1]).make(); + .genotypes(genotypes).filters(filters).attributes(attrs).make(); vcf.add(vc); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java index af6a52002..67ddc47ff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -40,14 +39,11 @@ public class GenomeEvent implements Comparable { final protected GenomeLoc loc; /** A set of the alleles segregating in this context */ final protected List alleles; - final protected Byte refBase; // final protected HashMap attributes; - public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List alleles, HashMap attributes, - byte base) { + public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List alleles, HashMap attributes) { this.loc = parser.createGenomeLoc(contig, start, stop); this.alleles = alleles; - this.refBase = base; // this.attributes = attributes; } @@ -68,7 +64,7 @@ public class GenomeEvent implements Comparable { public VariantContext createVariantContextFromEvent() { return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles) - .log10PError(0.0).referenceBaseForIndel(refBase).make(); + .log10PError(0.0).make(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java index 4b68eed2e..7c1d63f02 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java @@ -115,7 +115,7 @@ public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector { // create bare-bones event and log in corresponding bin // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes - GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel()); + GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes); binnedEventArray[binIndex].add(event); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java index eda75d647..4019c5631 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java @@ -65,7 +65,7 @@ public class UniformSamplingFrequencySelector extends FrequencyModeSelector { } // create bare-bones event and log in corresponding bin // attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes - GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel()); + GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes); binnedEventArray.add(event); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index 88bf3aef9..a509294ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -56,7 +56,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator { //increment stats for pairwise mismatches for (Allele allele : genotype.getAlleles()) { - if (allele.isNonNull() && allele.isCalled()) { + if (allele.isCalled()) { String alleleString = allele.toString(); alleleCounts.putIfAbsent(alleleString, 0); alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index c1755aa00..3f19e22d9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -226,6 +226,6 @@ public class LeftAlignVariants extends RodWalker { newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make()); } - return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make(); + return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 60d41abd5..094897edc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -116,7 +116,6 @@ public class LiftoverVariants extends RodWalker { if ( toInterval != null ) { // check whether the strand flips, and if so reverse complement everything - // TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size) if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) { vc = VariantContextUtils.reverseComplement(vc); } @@ -129,11 +128,10 @@ public class LiftoverVariants extends RodWalker { .attribute("OriginalStart", fromInterval.getStart()).make(); } - VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); - if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) { + if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) { logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s", - originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(), - originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0))); + originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(), + originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0))); } writer.add(vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 530258fe0..995e98931 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -127,35 +127,16 @@ public class ValidateVariants extends RodWalker { return; // get the true reference allele - Allele reportedRefAllele = vc.getReference(); - Allele observedRefAllele = null; - // insertions - if ( vc.isSimpleInsertion() ) { - observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING); + final Allele reportedRefAllele = vc.getReference(); + final int refLength = reportedRefAllele.length(); + if ( refLength > 100 ) { + logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", refLength, vc.getChr(), vc.getStart())); + return; } - // deletions - else if ( vc.isSimpleDeletion() || vc.isMNP() ) { - // we can't validate arbitrarily long deletions - if ( reportedRefAllele.length() > 100 ) { - logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart())); - return; - } - // deletions are associated with the (position of) the last (preceding) non-deleted base; - // hence to get actually deleted bases we need offset = 1 - int offset = vc.isMNP() ? 0 : 1; - byte[] refBytes = ref.getBases(); - byte[] trueRef = new byte[reportedRefAllele.length()]; - for (int i = 0; i < reportedRefAllele.length(); i++) - trueRef[i] = refBytes[i+offset]; - observedRefAllele = Allele.create(trueRef, true); - } - // SNPs, etc. but not mixed types because they are too difficult - else if ( !vc.isMixed() ) { - byte[] refByte = new byte[1]; - refByte[0] = ref.getBase(); - observedRefAllele = Allele.create(refByte, true); - } + final byte[] observedRefBases = new byte[refLength]; + System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength); + final Allele observedRefAllele = Allele.create(observedRefBases); // get the RS IDs Set rsIDs = null; @@ -168,10 +149,10 @@ public class ValidateVariants extends RodWalker { try { switch( type ) { case ALL: - vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs); + vc.extraStrictValidation(reportedRefAllele, observedRefAllele, rsIDs); break; case REF: - vc.validateReferenceBases(observedRefAllele, ref.getBase()); + vc.validateReferenceBases(reportedRefAllele, observedRefAllele); break; case IDS: vc.validateRSIDs(rsIDs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 996ac75e7..4806b2ebc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -378,7 +378,7 @@ public class VariantsToTable extends RodWalker { getters.put("REF", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); - x.append(vc.getAlleleStringWithRefPadding(vc.getReference())); + x.append(vc.getReference()); return x.toString(); } }); @@ -390,7 +390,7 @@ public class VariantsToTable extends RodWalker { for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); - x.append(vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(i))); + x.append(vc.getAlternateAllele(i)); } return x.toString(); } @@ -432,11 +432,8 @@ public class VariantsToTable extends RodWalker { private static Object splitAltAlleles(VariantContext vc) { final int numAltAlleles = vc.getAlternateAlleles().size(); if ( numAltAlleles == 1 ) - return vc.getAlleleStringWithRefPadding(vc.getAlternateAllele(0)); + return vc.getAlternateAllele(0); - final List alleles = new ArrayList(numAltAlleles); - for ( Allele allele : vc.getAlternateAlleles() ) - alleles.add(vc.getAlleleStringWithRefPadding(allele)); - return alleles; + return vc.getAlternateAlleles(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index e8c6794f2..cf568a62e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -100,12 +100,6 @@ public class VariantsToVCF extends RodWalker { @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false) protected String sampleName = null; - /** - * This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF). - */ - @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false) - protected boolean fixReferenceBase = false; - private Set allowedGenotypeFormatStrings = new HashSet(); private boolean wroteHeader = false; private Set samples; @@ -137,10 +131,6 @@ public class VariantsToVCF extends RodWalker { builder.genotypes(g); } - if ( fixReferenceBase ) { - builder.referenceBaseForIndel(ref.getBase()); - } - writeRecord(builder.make(), tracker, ref.getLocus()); } @@ -166,8 +156,8 @@ public class VariantsToVCF extends RodWalker { continue; Map alleleMap = new HashMap(2); - alleleMap.put(RawHapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isSimpleInsertion())); - alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 393dd5735..0065f9258 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -431,6 +431,37 @@ public class BaseUtils { return new String(simpleComplement(bases.getBytes())); } + /** + * Returns the uppercased version of the bases + * + * @param bases the bases + * @return the upper cased version + */ + static public byte[] convertToUpperCase(final byte[] bases) { + for ( int i = 0; i < bases.length; i++ ) { + if ( (char)bases[i] >= 'a' ) + bases[i] = toUpperCaseBase(bases[i]); + } + return bases; + } + + static public byte toUpperCaseBase(final byte base) { + switch (base) { + case 'a': + return 'A'; + case 'c': + return 'C'; + case 'g': + return 'G'; + case 't': + return 'T'; + case 'n': + return 'N'; + default: + return base; + } + } + /** * Returns the index of the most common base in the basecounts array. To be used with * pileup.getBaseCounts. diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0b9654610..0f9cc34e7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -305,27 +305,6 @@ public final class BCF2Codec implements FeatureCodec { builder.id(id); } - /** - * Annoying routine that deals with allele clipping from the BCF2 encoding to the standard - * GATK encoding. - * - * @param position - * @param ref - * @param unclippedAlleles - * @return - */ - @Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"}) - @Ensures("result.size() == unclippedAlleles.size()") - protected List clipAllelesIfNecessary(final int position, - final String ref, - final List unclippedAlleles) { - // the last argument of 1 allows us to safely ignore the end, because we are - // ultimately going to use the end in the record itself - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1); - if ( clipped.getError() != null ) error(clipped.getError()); - return clipped.getClippedAlleles(); - } - /** * Decode the alleles from this BCF2 file and put the results in builder * @param builder @@ -353,11 +332,9 @@ public final class BCF2Codec implements FeatureCodec { } assert ref != null; - alleles = clipAllelesIfNecessary(pos, ref, alleles); builder.alleles(alleles); assert ref.length() > 0; - builder.referenceBaseForIndel(ref.getBytes()[0]); return alleles; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index b3420514b..2b5695e3a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -248,6 +248,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec builder.id(parts[2]); final String ref = getCachedString(parts[3].toUpperCase()); + builder.stop(pos + ref.length() - 1); final String alts = getCachedString(parts[4].toUpperCase()); builder.log10PError(parseQual(parts[5])); @@ -257,8 +258,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec builder.attributes(attrs); // get our alleles, filters, and setup an attribute map - final List rawAlleles = parseAlleles(ref, alts, lineNo); - final List alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs); + final List alleles = parseAlleles(ref, alts, lineNo); + builder.alleles(alleles); // do we have genotyping data if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) { @@ -275,7 +276,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec VariantContext vc = null; try { - builder.referenceBaseForIndel(ref.getBytes()[0]); vc = builder.make(); } catch (Exception e) { generateException(e.getMessage()); @@ -284,31 +284,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec return vc; } - private final List updateBuilderAllelesAndStop(final VariantContextBuilder builder, - final String ref, - final int pos, - final List rawAlleles, - final Map attrs) { - int endForSymbolicAlleles = pos; // by default we use the pos - if ( attrs.containsKey(VCFConstants.END_KEY) ) { - // update stop with the end key if provided - try { - endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()); - } catch (Exception e) { - generateException("the END value in the INFO field is not valid"); - } - } - - // find out our current location, and clip the alleles down to their minimum length - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles); - if ( clipped.getError() != null ) - generateException(clipped.getError(), lineNo); - - builder.stop(clipped.getStop()); - builder.alleles(clipped.getClippedAlleles()); - return clipped.getClippedAlleles(); - } - /** * get the name of this codec * @return our set name diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java deleted file mode 100644 index 40ba23d9d..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipper.java +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.codecs.vcf; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.*; - -import java.util.*; - -/** - * All of the gross allele clipping and padding routines in one place - * - * Having attempted to understand / fix / document this code myself - * I can only conclude that this entire approach needs to be rethought. This - * code just doesn't work robustly with symbolic alleles, with multiple alleles, - * requires a special "reference base for indels" stored in the VariantContext - * whose correctness isn't enforced, and overall has strange special cases - * all over the place. - * - * The reason this code is so complex is due to symbolics and multi-alleleic - * variation, which frequently occur when combining variants from multiple - * VCF files. - * - * TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles - * TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT) - * - * @author Mark DePristo - * @since 6/12 - */ -public final class VCFAlleleClipper { - private VCFAlleleClipper() { } - - /** - * Determine whether we should clip off the first base of all unclippped alleles or not - * - * Returns true if all of the alleles in unclippedAlleles share a common first base with - * ref0. Ref0 should be the first base of the reference allele UnclippedAlleles may - * contain the reference allele itself, or just the alternate alleles, it doesn't matter. - * - * The algorithm returns true if the first base should be clipped off, or false otherwise - * - * This algorithm works even in the presence of symbolic alleles, logically ignoring these - * values. It - * - * @param unclippedAlleles list of unclipped alleles to assay - * @param ref0 the first base of the reference allele - * @return true if we should clip the first base of unclippedAlleles - */ - @Requires("unclippedAlleles != null") - public static boolean shouldClipFirstBaseP(final List unclippedAlleles, - final byte ref0) { - boolean allSymbolicAlt = true; - - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) { - continue; - } - - // already know we aren't symbolic, so we only need to decide if we have only seen a ref - if ( ! a.isReference() ) - allSymbolicAlt = false; - - if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { - return false; - } - } - - // to reach here all alleles are consistent with clipping the first base matching ref0 - // but we don't clip if all ALT alleles are symbolic - return ! allSymbolicAlt; - } - - public static int computeReverseClipping(final List unclippedAlleles, - final byte[] ref, - final int forwardClipping, - final boolean allowFullClip) { - int clipping = 0; - boolean stillClipping = true; - - while ( stillClipping ) { - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) - continue; - - // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong - // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). - if ( a.length() - clipping == 0 ) - return clipping - (allowFullClip ? 0 : 1); - - if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { - stillClipping = false; - } - else if ( ref.length == clipping ) { - if ( allowFullClip ) - stillClipping = false; - else - return -1; - } - else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { - stillClipping = false; - } - } - if ( stillClipping ) - clipping++; - } - - return clipping; - } - - /** - * Are the alleles describing a polymorphism substitution one base for another? - * - * @param alleles a list of alleles, must not be empty - * @return Return true if the length of any allele in alleles isn't 1 - */ - @Requires("!alleles.isEmpty()") - private static boolean isSingleNucleotideEvent(final List alleles) { - for ( final Allele a : alleles ) { - if ( a.length() != 1 ) - return false; - } - return true; - } - - /** - * clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened - * - * The ClippedAlleles object contains the implied stop position of the alleles, given the provided start - * position, after clipping. It also contains the list of alleles, in the same order as the provided - * unclipped ones, that are the fully clipped version of the input alleles. If an error occurs - * during this option the getError() function returns a string describing the problem (for use in parsers). - * - * The basic operation are: - * - * single allele - * => stop == start and clipped == unclipped - * any number of single nucleotide events - * => stop == start and clipped == unclipped - * two alleles, second being symbolic - * => stop == start and clipped == unclipped - * Note in this case that the STOP should be computed by other means (from END in VCF, for example) - * Note that if there's more than two alleles and the second is a symbolic the code produces an error - * Any other case: - * The alleles are trimmed of any sequence shared at the end of the alleles. If N bases - * are common then the alleles will all be at least N bases shorter. - * The stop position returned is the start position + the length of the - * reverse trimmed only reference allele - 1. - * If the alleles all share a single common starting sequence (just one base is considered) - * then the alleles have this leading common base removed as well. - * - * TODO This code is gross and brittle and needs to be rethought from scratch - * - * @param start the unadjusted start position (pre-clipping) - * @param ref the reference string - * @param unclippedAlleles the list of unclipped alleles, including the reference allele - * @return the new reference end position of this event - */ - @Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"}) - @Ensures("result != null") - public static ClippedAlleles clipAlleles(final int start, - final String ref, - final List unclippedAlleles, - final int endForSymbolicAllele ) { - // no variation or single nucleotide events are by definition fully clipped - if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) ) - return new ClippedAlleles(start, unclippedAlleles, null); - - // we've got to sort out the clipping by looking at the alleles themselves - final byte firstRefBase = (byte) ref.charAt(0); - final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase); - final int forwardClipping = firstBaseIsClipped ? 1 : 0; - final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false); - final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0; - - if ( reverseClipping == -1 ) - return new ClippedAlleles("computeReverseClipping failed due to bad alleles"); - - boolean sawSymbolic = false; - List clippedAlleles; - if ( ! needsClipping ) { - // there's nothing to clip, so clippedAlleles are the original alleles - clippedAlleles = unclippedAlleles; - } else { - clippedAlleles = new ArrayList(unclippedAlleles.size()); - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) { - sawSymbolic = true; - clippedAlleles.add(a); - } else { - final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping); - if ( !Allele.acceptableAlleleBases(allele) ) - return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]"); - clippedAlleles.add(Allele.create(allele, a.isReference())); - } - } - } - - int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele); - - // TODO - // TODO - // TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1 - // TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED - // TODO - // TODO - if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++; - // TODO - // TODO - // TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1 - // TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED - // TODO - // TODO - - final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null; - return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel); - } - - /** - * Returns true if the alleles in inputVC should have reference bases added for padding - * - * We need to pad a VC with a common base if the length of the reference allele is - * less than the length of the VariantContext. This happens because the position of - * e.g. an indel is always one before the actual event (as per VCF convention). - * - * @param inputVC the VC to evaluate, cannot be null - * @return true if - */ - public static boolean needsPadding(final VariantContext inputVC) { - // biallelic sites with only symbolic never need padding - if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() ) - return false; - - final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1; - final int referenceLength = inputVC.getReference().length(); - - if ( referenceLength == recordLength ) - return false; - else if ( referenceLength == recordLength - 1 ) - return true; - else if ( !inputVC.hasSymbolicAlleles() ) - throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + - " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); - else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() ) - throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC); - return false; - } - - public static Allele padAllele(final VariantContext vc, final Allele allele) { - assert needsPadding(vc); - - if ( allele.isSymbolic() ) - return allele; - else { - // get bases for current allele and create a new one with trimmed bases - final StringBuilder sb = new StringBuilder(); - sb.append((char)vc.getReferenceBaseForIndel().byteValue()); - sb.append(allele.getDisplayString()); - final String newBases = sb.toString(); - return Allele.create(newBases, allele.isReference()); - } - } - - public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) { - final boolean padVC = needsPadding(inputVC); - - // nothing to do if we don't need to pad bases - if ( padVC ) { - if ( !inputVC.hasReferenceBaseForIndel() ) - throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); - - final ArrayList alleles = new ArrayList(inputVC.getNAlleles()); - final Map unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap(inputVC.getNAlleles()) : null; - - boolean paddedAtLeastOne = false; - for (final Allele a : inputVC.getAlleles()) { - final Allele padded = padAllele(inputVC, a); - paddedAtLeastOne = paddedAtLeastOne || padded != a; - alleles.add(padded); - if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make - } - - if ( ! paddedAtLeastOne ) - throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles()); - - final VariantContextBuilder vcb = new VariantContextBuilder(inputVC); - vcb.alleles(alleles); - - // the position of the inputVC is one further, if it doesn't contain symbolic alleles - vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd()); - - if ( inputVC.hasGenotypes() ) { - assert unpaddedToPadded != null; - - // now we can recreate new genotypes with trimmed alleles - final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); - for (final Genotype g : inputVC.getGenotypes() ) { - final List newGenotypeAlleles = new ArrayList(g.getAlleles().size()); - for (final Allele a : g.getAlleles()) { - newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make()); - } - vcb.genotypes(genotypes); - } - - return vcb.make(); - } - else - return inputVC; - - } - - public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { - // see if we need to trim common reference base from all alleles - - final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true); - if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) - return inputVC; - - final List alleles = new ArrayList(); - final GenotypesContext genotypes = GenotypesContext.create(); - final Map originalToTrimmedAlleleMap = new HashMap(); - - for (final Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); - final Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // now we can recreate new genotypes with trimmed alleles - for ( final Genotype genotype : inputVC.getGenotypes() ) { - final List originalAlleles = genotype.getAlleles(); - final List trimmedAlleles = new ArrayList(); - for ( final Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); - } - - return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make(); - } - - @Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both - public static class ClippedAlleles { - private final int stop; - private final List clippedAlleles; - private final Byte refBaseForIndel; - private final String error; - - @Requires({"stop > 0", "clippedAlleles != null"}) - private ClippedAlleles(final int stop, final List clippedAlleles, final Byte refBaseForIndel) { - this.stop = stop; - this.clippedAlleles = clippedAlleles; - this.error = null; - this.refBaseForIndel = refBaseForIndel; - } - - @Requires("error != null") - private ClippedAlleles(final String error) { - this.stop = -1; - this.clippedAlleles = null; - this.refBaseForIndel = null; - this.error = error; - } - - /** - * Get an error if it occurred - * @return the error message, or null if no error occurred - */ - public String getError() { - return error; - } - - /** - * Get the stop position to use after the clipping as been applied, given the - * provided position to clipAlleles - * @return - */ - public int getStop() { - return stop; - } - - /** - * Get the clipped alleles themselves - * @return the clipped alleles in the order of the input unclipped alleles - */ - public List getClippedAlleles() { - return clippedAlleles; - } - - /** - * Returns the reference base we should use for indels, or null if none is appropriate - * @return - */ - public Byte getRefBaseForIndel() { - return refBaseForIndel; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index 2e1770581..1947ef01e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.utils.variantcontext; -import java.util.ArrayList; +import org.broadinstitute.sting.utils.BaseUtils; + import java.util.Arrays; import java.util.Collection; -import java.util.List; /** * Immutable representation of an allele @@ -77,22 +77,19 @@ public class Allele implements Comparable { private static final byte[] EMPTY_ALLELE_BASES = new byte[0]; private boolean isRef = false; - private boolean isNull = false; private boolean isNoCall = false; private boolean isSymbolic = false; private byte[] bases = null; - public final static String NULL_ALLELE_STRING = "-"; public final static String NO_CALL_STRING = "."; /** A generic static NO_CALL allele for use */ // no public way to create an allele private Allele(byte[] bases, boolean isRef) { - // standardize our representation of null allele and bases + // null alleles are no longer allowed if ( wouldBeNullAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; - isNull = true; + throw new IllegalArgumentException("Null alleles are not supported"); } else if ( wouldBeNoCallAllele(bases) ) { bases = EMPTY_ALLELE_BASES; isNoCall = true; @@ -101,8 +98,8 @@ public class Allele implements Comparable { isSymbolic = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele"); } -// else -// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance + else + bases = BaseUtils.convertToUpperCase(bases); this.isRef = isRef; this.bases = bases; @@ -126,8 +123,6 @@ public class Allele implements Comparable { private final static Allele ALT_T = new Allele("T", false); private final static Allele REF_N = new Allele("N", true); private final static Allele ALT_N = new Allele("N", false); - private final static Allele REF_NULL = new Allele(NULL_ALLELE_STRING, true); - private final static Allele ALT_NULL = new Allele(NULL_ALLELE_STRING, false); public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false); // --------------------------------------------------------------------------------------------------------- @@ -154,7 +149,6 @@ public class Allele implements Comparable { case '.': if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); return NO_CALL; - case '-': return isRef ? REF_NULL : ALT_NULL; case 'A': case 'a' : return isRef ? REF_A : ALT_A; case 'C': case 'c' : return isRef ? REF_C : ALT_C; case 'G': case 'g' : return isRef ? REF_G : ALT_G; @@ -179,7 +173,7 @@ public class Allele implements Comparable { public static Allele extend(Allele left, byte[] right) { if (left.isSymbolic()) throw new IllegalArgumentException("Cannot extend a symbolic allele"); - byte[] bases = null; + byte[] bases; if ( left.length() == 0 ) bases = right; else { @@ -242,7 +236,10 @@ public class Allele implements Comparable { } public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) { - if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) + if ( wouldBeNullAllele(bases) ) + return false; + + if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) return true; for (byte base : bases ) { @@ -299,11 +296,6 @@ public class Allele implements Comparable { // // --------------------------------------------------------------------------------------------------------- - //Returns true if this is the null allele - public boolean isNull() { return isNull; } - // Returns true if this is not the null allele - public boolean isNonNull() { return ! isNull(); } - // Returns true if this is the NO_CALL allele public boolean isNoCall() { return isNoCall; } // Returns true if this is not the NO_CALL allele @@ -319,7 +311,7 @@ public class Allele implements Comparable { // Returns a nice string representation of this object public String toString() { - return (isNull() ? NULL_ALLELE_STRING : ( isNoCall() ? NO_CALL_STRING : getDisplayString() )) + (isReference() ? "*" : ""); + return ( isNoCall() ? NO_CALL_STRING : getDisplayString() ) + (isReference() ? "*" : ""); } /** @@ -384,27 +376,27 @@ public class Allele implements Comparable { * @return true if this and other are equal */ public boolean equals(Allele other, boolean ignoreRefState) { - return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases)); + return this == other || (isRef == other.isRef || ignoreRefState) && isNoCall == other.isNoCall && (bases == other.bases || Arrays.equals(bases, other.bases)); } /** * @param test bases to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(byte[] test) { return !isSymbolic && (bases == test || Arrays.equals(bases, test)); } /** * @param test bases to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } /** * @param test allele to test against * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles + * @return true if this Allele contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles */ public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } @@ -421,10 +413,6 @@ public class Allele implements Comparable { // // --------------------------------------------------------------------------------------------------------- - public static Allele getMatchingAllele(Collection allAlleles, String alleleBases) { - return getMatchingAllele(allAlleles, alleleBases.getBytes()); - } - public static Allele getMatchingAllele(Collection allAlleles, byte[] alleleBases) { for ( Allele a : allAlleles ) { if ( a.basesMatch(alleleBases) ) { @@ -438,26 +426,6 @@ public class Allele implements Comparable { return null; // couldn't find anything } - public static List resolveAlleles(List possibleAlleles, List alleleStrings) { - List myAlleles = new ArrayList(alleleStrings.size()); - - for ( String alleleString : alleleStrings ) { - Allele allele = getMatchingAllele(possibleAlleles, alleleString); - - if ( allele == null ) { - if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) { - allele = create(alleleString); - } else { - throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles); - } - } - - myAlleles.add(allele); - } - - return myAlleles; - } - public int compareTo(Allele other) { if ( isReference() && other.isNonReference() ) return -1; @@ -468,9 +436,6 @@ public class Allele implements Comparable { } public static boolean oneIsPrefixOfOther(Allele a1, Allele a2) { - if ( a1.isNull() || a2.isNull() ) - return true; - if ( a2.length() >= a1.length() ) return firstIsPrefixOfSecond(a1, a2); else diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index dcdd95d00..f298f1187 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -188,8 +188,6 @@ public class VariantContext implements Feature { // to enable tribble integratio @Deprecated // ID is no longer stored in the attributes map private final static String ID_KEY = "ID"; - private final Byte REFERENCE_BASE_FOR_INDEL; - public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet()); /** The location of this VariantContext */ @@ -228,7 +226,6 @@ public class VariantContext implements Feature { // to enable tribble integratio // --------------------------------------------------------------------------------------------------------- public enum Validation { - REF_PADDING, ALLELES, GENOTYPES } @@ -250,7 +247,7 @@ public class VariantContext implements Feature { // to enable tribble integratio this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getLog10PError(), other.getFiltersMaybeNull(), - other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, + other.getAttributes(), other.fullyDecoded, NO_VALIDATION); } @@ -266,7 +263,6 @@ public class VariantContext implements Feature { // to enable tribble integratio * @param log10PError qual * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes - * @param referenceBaseForIndel padded reference base * @param validationToPerform set of validation steps to take */ protected VariantContext(final String source, @@ -279,7 +275,6 @@ public class VariantContext implements Feature { // to enable tribble integratio final double log10PError, final Set filters, final Map attributes, - final Byte referenceBaseForIndel, final boolean fullyDecoded, final EnumSet validationToPerform ) { if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } @@ -292,7 +287,6 @@ public class VariantContext implements Feature { // to enable tribble integratio this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID; this.commonInfo = new CommonInfo(source, log10PError, filters, attributes); - REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel; // todo -- remove me when this check is no longer necessary if ( this.commonInfo.hasAttribute(ID_KEY) ) @@ -340,8 +334,9 @@ public class VariantContext implements Feature { // to enable tribble integratio * in this VC is returned as the set of alleles in the subContext, even if * some of those alleles aren't in the samples * - * @param sampleNames - * @return + * @param sampleNames the sample names + * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples + * @return new VariantContext subsetting to just the given samples */ public VariantContext subContextFromSamples(Set sampleNames, final boolean rederiveAllelesFromGenotypes ) { if ( sampleNames.containsAll(getSampleNames()) ) { @@ -501,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio */ public boolean isSimpleInsertion() { // can't just call !isSimpleDeletion() because of complex indels - return getType() == Type.INDEL && getReference().isNull() && isBiallelic(); + return getType() == Type.INDEL && isBiallelic() && getReference().length() < getAlternateAllele(0).length(); } /** @@ -509,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio */ public boolean isSimpleDeletion() { // can't just call !isSimpleInsertion() because of complex indels - return getType() == Type.INDEL && getAlternateAllele(0).isNull() && isBiallelic(); + return getType() == Type.INDEL && isBiallelic() && getReference().length() > getAlternateAllele(0).length(); } /** @@ -553,22 +548,6 @@ public class VariantContext implements Feature { // to enable tribble integratio return ID; } - public boolean hasReferenceBaseForIndel() { - return REFERENCE_BASE_FOR_INDEL != null; - } - - // the indel base that gets stripped off for indels - public Byte getReferenceBaseForIndel() { - return REFERENCE_BASE_FOR_INDEL; - } - - public String getAlleleStringWithRefPadding(final Allele allele) { - if ( VCFAlleleClipper.needsPadding(this) ) - return VCFAlleleClipper.padAllele(this, allele).getDisplayString(); - else - return allele.getDisplayString(); - } - // --------------------------------------------------------------------------------------------------------- // @@ -808,8 +787,8 @@ public class VariantContext implements Feature { // to enable tribble integratio * Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map * for consistency with the multi-get function. * - * @param sampleName - * @return + * @param sampleName the sample name + * @return mapping from sample name to genotype * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ public GenotypesContext getGenotypes(String sampleName) { @@ -823,7 +802,7 @@ public class VariantContext implements Feature { // to enable tribble integratio * For testing convenience only * * @param sampleNames a unique list of sample names - * @return + * @return subsetting genotypes context * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ protected GenotypesContext getGenotypes(Collection sampleNames) { @@ -1011,13 +990,13 @@ public class VariantContext implements Feature { // to enable tribble integratio /** * Run all extra-strict validation tests on a Variant Context object * - * @param reference the true reference allele - * @param paddedRefBase the reference base used for padding indels - * @param rsIDs the true dbSNP IDs + * @param reportedReference the reported reference allele + * @param observedReference the actual reference allele + * @param rsIDs the true dbSNP IDs */ - public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set rsIDs) { + public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set rsIDs) { // validate the reference - validateReferenceBases(reference, paddedRefBase); + validateReferenceBases(reportedReference, observedReference); // validate the RS IDs validateRSIDs(rsIDs); @@ -1032,18 +1011,9 @@ public class VariantContext implements Feature { // to enable tribble integratio //checkReferenceTrack(); } - public void validateReferenceBases(Allele reference, Byte paddedRefBase) { - if ( reference == null ) - return; - - // don't validate if we're a complex event - if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) { - throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); - } - - // we also need to validate the padding base for simple indels - if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) { - throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue())); + public void validateReferenceBases(final Allele reportedReference, final Allele observedReference) { + if ( reportedReference != null && !reportedReference.basesMatch(observedReference) ) { + throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), observedReference.getBaseString(), reportedReference.getBaseString())); } } @@ -1135,7 +1105,6 @@ public class VariantContext implements Feature { // to enable tribble integratio for (final Validation val : validationToPerform ) { switch (val) { case ALLELES: validateAlleles(); break; - case REF_PADDING: validateReferencePadding(); break; case GENOTYPES: validateGenotypes(); break; default: throw new IllegalArgumentException("Unexpected validation mode " + val); } @@ -1164,20 +1133,11 @@ public class VariantContext implements Feature { // to enable tribble integratio } } - private void validateReferencePadding() { - if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding... - return; - - boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed - - if ( needsPadding && !hasReferenceBaseForIndel() ) - throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided."); - } - private void validateAlleles() { - // check alleles - boolean alreadySeenRef = false, alreadySeenNull = false; - for ( Allele allele : alleles ) { + + boolean alreadySeenRef = false; + + for ( final Allele allele : alleles ) { // make sure there's only one reference allele if ( allele.isReference() ) { if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); @@ -1187,24 +1147,14 @@ public class VariantContext implements Feature { // to enable tribble integratio if ( allele.isNoCall() ) { throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this); } - - // make sure there's only one null allele - if ( allele.isNull() ) { - if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this); - alreadySeenNull = true; - } } // make sure there's one reference allele if ( ! alreadySeenRef ) throw new IllegalArgumentException("No reference allele found in VariantContext"); -// if ( getType() == Type.INDEL ) { -// if ( getReference().length() != (getLocation().size()-1) ) { - long length = (stop - start) + 1; - if ( ! hasSymbolicAlleles() - && ((getReference().isNull() && length != 1 ) - || (getReference().isNonNull() && (length - getReference().length() > 1)))) { + final long length = (stop - start) + 1; + if ( ! hasSymbolicAlleles() && length != getReference().length() ) { throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index f2375f6f9..d8ab4bd23 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -25,9 +25,6 @@ package org.broadinstitute.sting.utils.variantcontext; import com.google.java.contract.*; -import org.broad.tribble.Feature; -import org.broad.tribble.TribbleException; -import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -74,7 +71,6 @@ public class VariantContextBuilder { private Set filters = null; private Map attributes = null; private boolean attributesCanBeModified = false; - private Byte referenceBaseForIndel = null; /** enum of what must be validated */ final private EnumSet toValidate = EnumSet.noneOf(VariantContext.Validation.class); @@ -117,7 +113,6 @@ public class VariantContextBuilder { this.genotypes = parent.genotypes; this.ID = parent.getID(); this.log10PError = parent.getLog10PError(); - this.referenceBaseForIndel = parent.getReferenceBaseForIndel(); this.source = parent.getSource(); this.start = parent.getStart(); this.stop = parent.getEnd(); @@ -132,7 +127,6 @@ public class VariantContextBuilder { this.genotypes = parent.genotypes; this.ID = parent.ID; this.log10PError = parent.log10PError; - this.referenceBaseForIndel = parent.referenceBaseForIndel; this.source = parent.source; this.start = parent.start; this.stop = parent.stop; @@ -362,21 +356,6 @@ public class VariantContextBuilder { return this; } - /** - * Tells us that the resulting VariantContext should use this byte for the reference base - * Null means no refBase is available - * @param referenceBaseForIndel - */ - public VariantContextBuilder referenceBaseForIndel(final Byte referenceBaseForIndel) { - this.referenceBaseForIndel = referenceBaseForIndel; - toValidate.add(VariantContext.Validation.REF_PADDING); - return this; - } - - public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) { - return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]); - } - /** * Tells us that the resulting VariantContext should have source field set to source * @param source @@ -401,7 +380,6 @@ public class VariantContextBuilder { this.start = start; this.stop = stop; toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -416,7 +394,6 @@ public class VariantContextBuilder { this.start = loc.getStart(); this.stop = loc.getStop(); toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -440,7 +417,6 @@ public class VariantContextBuilder { public VariantContextBuilder start(final long start) { this.start = start; toValidate.add(VariantContext.Validation.ALLELES); - toValidate.add(VariantContext.Validation.REF_PADDING); return this; } @@ -517,6 +493,6 @@ public class VariantContextBuilder { public VariantContext make() { return new VariantContext(source, ID, contig, start, stop, alleles, genotypes, log10PError, filters, attributes, - referenceBaseForIndel, fullyDecoded, toValidate); + fullyDecoded, toValidate); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index d7e072980..e1a043e94 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -64,9 +64,9 @@ public class VariantContextUtils { * Ensures that VC contains all of the samples in allSamples by adding missing samples to * the resulting VC with default diploid ./. genotypes * - * @param vc - * @param allSamples - * @return + * @param vc the VariantContext + * @param allSamples all of the samples needed + * @return a new VariantContext with missing samples added */ public static VariantContext addMissingSamples(final VariantContext vc, final Set allSamples) { // TODO -- what's the fastest way to do this calculation? @@ -376,9 +376,9 @@ public class VariantContextUtils { /** * @deprecated use variant context builder version instead - * @param vc - * @param keysToPreserve - * @return + * @param vc the variant context + * @param keysToPreserve the keys to preserve + * @return a pruned version of the original variant context */ @Deprecated public static VariantContext pruneVariantContext(final VariantContext vc, Collection keysToPreserve ) { @@ -486,14 +486,13 @@ public class VariantContextUtils { if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE ) verifyUniqueSampleNames(unsortedVCs); - final List prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); + final List preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); // Make sure all variant contexts are padded with reference base in case of indels if necessary final List VCs = new ArrayList(); - for (final VariantContext vc : prepaddedVCs) { - // also a reasonable place to remove filtered calls, if needed + for (final VariantContext vc : preFilteredVCs) { if ( ! filteredAreUncalled || vc.isNotFiltered() ) - VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc)); + VCs.add(vc); } if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; @@ -547,9 +546,6 @@ public class VariantContextUtils { filters.addAll(vc.getFilters()); - if ( referenceBaseForIndel == null ) - referenceBaseForIndel = vc.getReferenceBaseForIndel(); - // // add attributes // @@ -661,10 +657,9 @@ public class VariantContextUtils { builder.genotypes(genotypes); builder.log10PError(log10PError); builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); - builder.referenceBaseForIndel(referenceBaseForIndel); // Trim the padded bases of all alleles if necessary - final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make()); + final VariantContext merged = builder.make(); if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); return merged; } @@ -700,73 +695,6 @@ public class VariantContextUtils { return true; } - private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { - // see if we need to trim common reference base from all alleles - boolean trimVC; - - // We need to trim common reference base from all alleles in all genotypes if a ref base is common to all alleles - Allele refAllele = inputVC.getReference(); - if (!inputVC.isVariant()) - trimVC = false; - else if (refAllele.isNull()) - trimVC = false; - else { - trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0)); - } - - // nothing to do if we don't need to trim bases - if (trimVC) { - List alleles = new ArrayList(); - GenotypesContext genotypes = GenotypesContext.create(); - - Map originalToTrimmedAlleleMap = new HashMap(); - - for (final Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); - Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // detect case where we're trimming bases but resulting vc doesn't have any null allele. In that case, we keep original representation - // example: mixed records such as {TA*,TGA,TG} - boolean hasNullAlleles = false; - - for (final Allele a: originalToTrimmedAlleleMap.values()) { - if (a.isNull()) - hasNullAlleles = true; - } - - if (!hasNullAlleles) - return inputVC; - // now we can recreate new genotypes with trimmed alleles - for ( final Genotype genotype : inputVC.getGenotypes() ) { - - List originalAlleles = genotype.getAlleles(); - List trimmedAlleles = new ArrayList(); - for ( final Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); - - } - - final VariantContextBuilder builder = new VariantContextBuilder(inputVC); - return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make(); - } - - return inputVC; - } - public static GenotypesContext stripPLs(GenotypesContext genotypes) { GenotypesContext newGs = GenotypesContext.create(genotypes.size()); @@ -979,7 +907,7 @@ public class VariantContextUtils { HashMap alleleMap = new HashMap(vc.getAlleles().size()); for ( Allele originalAllele : vc.getAlleles() ) { Allele newAllele; - if ( originalAllele.isNoCall() || originalAllele.isNull() ) + if ( originalAllele.isNoCall() ) newAllele = originalAllele; else newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index df2008e8e..b5da206ad 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -274,10 +274,7 @@ class BCF2Writer extends IndexingVariantContextWriter { } private void buildAlleles( VariantContext vc ) throws IOException { - final boolean needsPadding = VCFAlleleClipper.needsPadding(vc); for ( Allele allele : vc.getAlleles() ) { - if ( needsPadding ) - allele = VCFAlleleClipper.padAllele(vc, allele); final byte[] s = allele.getDisplayBases(); if ( s == null ) throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 4548e026e..ea968e153 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -162,7 +162,6 @@ class VCFWriter extends IndexingVariantContextWriter { vc = new VariantContextBuilder(vc).noGenotypes().make(); try { - vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); super.add(vc); Map alleleMap = buildAlleleMap(vc); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java index ed9805d19..3bf020df7 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java @@ -37,8 +37,6 @@ import org.testng.annotations.Test; // public Allele(byte[] bases, boolean isRef) { // public Allele(boolean isRef) { // public Allele(String bases, boolean isRef) { -// public boolean isNullAllele() { return length() == 0; } -// public boolean isNonNullAllele() { return ! isNullAllele(); } // public boolean isReference() { return isRef; } // public boolean isNonReference() { return ! isReference(); } // public byte[] getBases() { return bases; } @@ -72,8 +70,6 @@ public class AlleleUnitTest { Assert.assertFalse(A.isReference()); Assert.assertTrue(A.basesMatch("A")); Assert.assertEquals(A.length(), 1); - Assert.assertTrue(A.isNonNull()); - Assert.assertFalse(A.isNull()); Assert.assertTrue(ARef.isReference()); Assert.assertFalse(ARef.isNonReference()); @@ -92,8 +88,8 @@ public class AlleleUnitTest { Assert.assertFalse(NoCall.isReference()); Assert.assertFalse(NoCall.basesMatch(".")); Assert.assertEquals(NoCall.length(), 0); - Assert.assertTrue(NoCall.isNonNull()); - Assert.assertFalse(NoCall.isNull()); + Assert.assertTrue(NoCall.isNoCall()); + Assert.assertFalse(NoCall.isCalled()); } @@ -111,8 +107,6 @@ public class AlleleUnitTest { Assert.assertFalse(del.basesMatch("-")); Assert.assertTrue(del.basesMatch("")); Assert.assertEquals(del.length(), 0); - Assert.assertFalse(del.isNonNull()); - Assert.assertTrue(del.isNull()); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 1d290118f..11c75ed9a 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -845,7 +845,6 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError()); Assert.assertEquals(sub.getFilters(), vc.getFilters()); Assert.assertEquals(sub.getID(), vc.getID()); - Assert.assertEquals(sub.getReferenceBaseForIndel(), vc.getReferenceBaseForIndel()); Assert.assertEquals(sub.getAttributes(), vc.getAttributes()); Set expectedGenotypes = new HashSet(); From 2ae890155cad5a3386cd7a53740960ce604487a3 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 26 Jul 2012 13:43:00 -0400 Subject: [PATCH 008/176] Improvements to indel calling in pool caller: a) Compute per-read likelihoods in reference sample to determine wheter a read is informative or not. b) Fixed bugs in unit tests. c) Fixed padding-related bugs when computing matches/mismatches in ErrorModel, d) Added a couple of more integration tests to increase test coverage, including testing odd ploidy --- .../gatk/walkers/genotyper/ErrorModel.java | 102 +++++++++++++----- ...olGenotypeLikelihoodsCalculationModel.java | 5 +- .../PoolIndelGenotypeLikelihoods.java | 8 +- ...elGenotypeLikelihoodsCalculationModel.java | 6 -- .../genotyper/PoolCallerIntegrationTest.java | 19 +++- .../PoolGenotypeLikelihoodsUnitTest.java | 18 ++-- .../indels/PairHMMIndelErrorModel.java | 4 +- .../ArtificialReadPileupTestProvider.java | 4 +- 8 files changed, 120 insertions(+), 46 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java index f91e535b0..864414de9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java @@ -1,6 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -9,6 +13,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; +import java.util.LinkedHashMap; /** * Created by IntelliJ IDEA. @@ -30,24 +35,26 @@ public class ErrorModel { private static final boolean compressRange = false; private static final double log10MinusE = Math.log10(Math.exp(1.0)); - + private static final boolean DEBUG = false; /** * Calculates the probability of the data (reference sample reads) given the phred scaled site quality score. * - * @param minQualityScore Minimum site quality score to evaluate - * @param maxQualityScore Maximum site quality score to evaluate - * @param phredScaledPrior Prior for site quality + * @param UAC Argument Collection * @param refSamplePileup Reference sample pileup * @param refSampleVC VC with True alleles in reference sample pileup - * @param minPower Minimum power */ - public ErrorModel (byte minQualityScore, byte maxQualityScore, byte phredScaledPrior, - ReadBackedPileup refSamplePileup, VariantContext refSampleVC, double minPower) { - this.maxQualityScore = maxQualityScore; - this.minQualityScore = minQualityScore; - this.phredScaledPrior = phredScaledPrior; - log10minPower = Math.log10(minPower); + public ErrorModel (final UnifiedArgumentCollection UAC, + final ReadBackedPileup refSamplePileup, + VariantContext refSampleVC, final ReferenceContext refContext) { + this.maxQualityScore = UAC.maxQualityScore; + this.minQualityScore = UAC.minQualityScore; + this.phredScaledPrior = UAC.phredScaledPrior; + log10minPower = Math.log10(UAC.minPower); + PairHMMIndelErrorModel pairModel = null; + LinkedHashMap haplotypeMap = null; + HashMap> indelLikelihoodMap = null; + double[][] perReadLikelihoods = null; double[] model = new double[maxQualityScore+1]; Arrays.fill(model,Double.NEGATIVE_INFINITY); @@ -61,11 +68,17 @@ public class ErrorModel { break; } } - - + if (refSampleVC.isIndel()) { + pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, + UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION); + haplotypeMap = new LinkedHashMap(); + indelLikelihoodMap = new HashMap>(); + IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements + } } + + double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore)); if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) { - double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore)); for (byte q=minQualityScore; q<=maxQualityScore; q++) { // maximum uncertainty if there's no ref data at site model[q] = p; @@ -75,23 +88,47 @@ public class ErrorModel { else { hasData = true; int matches = 0; - int coverage = refSamplePileup.getNumberOfElements(); + int coverage = 0; Allele refAllele = refSampleVC.getReference(); + if (refSampleVC.isIndel()) { + final int readCounts[] = new int[refSamplePileup.getNumberOfElements()]; + //perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()]; + final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles()); + perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts); + } + int idx = 0; for (PileupElement refPileupElement : refSamplePileup) { + if (DEBUG) + System.out.println(refPileupElement.toString()); boolean isMatch = false; - for (Allele allele : refSampleVC.getAlleles()) - isMatch |= pileupElementMatches(refPileupElement, allele, refAllele); + for (Allele allele : refSampleVC.getAlleles()) { + boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase()); + if (DEBUG) System.out.println(m); + isMatch |= m; + } + if (refSampleVC.isIndel()) { + // ignore match/mismatch if reads, as determined by their likelihood, are not informative + double[] perAlleleLikelihoods = perReadLikelihoods[idx++]; + if (!isInformativeElement(perAlleleLikelihoods)) + matches++; + else + matches += (isMatch?1:0); - matches += (isMatch?1:0); - // System.out.format("MATCH:%b\n",isMatch); + } else { + matches += (isMatch?1:0); + } + coverage++; } int mismatches = coverage - matches; //System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches); for (byte q=minQualityScore; q<=maxQualityScore; q++) { - model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches); + if (coverage==0) + model[q] = p; + else + model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches); } this.refDepth = coverage; } @@ -101,6 +138,17 @@ public class ErrorModel { } + @Requires("likelihoods.length>0") + private boolean isInformativeElement(double[] likelihoods) { + // if likelihoods are the same, they're not informative + final double thresh = 0.1; + int maxIdx = MathUtils.maxElementIndex(likelihoods); + int minIdx = MathUtils.minElementIndex(likelihoods); + if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh) + return false; + else + return true; + } /** * Simple constructor that just takes a given log-probability vector as error model. * Only intended for unit testing, not general usage. @@ -115,23 +163,27 @@ public class ErrorModel { } - public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele) { - /* System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n", + public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) { + if (DEBUG) + System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n", pileupElement.getBase(), pileupElement.isBeforeDeletionStart(), pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString()); - */ + //pileupElement. // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch if (allele.isReference()) { // for a ref allele, any base mismatch or new indel is a mismatch. - if(allele.getBases().length>0 && allele.getBases().length == refAllele.getBases().length ) // SNP/MNP case - return (/*!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart() &&*/ pileupElement.getBase() == allele.getBases()[0]); + if(allele.getBases().length>0 ) + // todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location + // could clean up/simplify this when unpadding is removed + return (pileupElement.getBase() == refBase); else // either null allele to compare, or ref/alt lengths are different (indel by definition). // if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart()); } + // for non-ref alleles to compare: if (refAllele.getBases().length == allele.getBases().length) // alleles have the same length (eg snp or mnp) return pileupElement.getBase() == allele.getBases()[0]; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java index 37b676601..df4aad0eb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java @@ -247,7 +247,8 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi // find the alternate allele(s) that we should be using final List alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs); - + if (alleles == null || alleles.isEmpty()) + return null; // start making the VariantContext final GenomeLoc loc = ref.getLocus(); final int endLoc = getEndLocation(tracker, ref, alleles); @@ -313,7 +314,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi refLanePileup = refPileup.getPileupForLane(laneID); //ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles); - perLaneErrorModels.put(laneID, new ErrorModel(UAC.minQualityScore, UAC.maxQualityScore, UAC.phredScaledPrior, refLanePileup, refVC, UAC.minPower)); + perLaneErrorModels.put(laneID, new ErrorModel(UAC, refLanePileup, refVC, ref)); } return perLaneErrorModels; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java index dae7bd43d..33b7b8b90 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java @@ -25,6 +25,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { final int eventLength; double[][] readHaplotypeLikelihoods; + final byte refBase; + public PoolIndelGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy, @@ -38,6 +40,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { this.haplotypeMap = haplotypeMap; this.refContext = referenceContext; this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles); + // todo - not needed if indel alleles have base at current position + this.refBase = referenceContext.getBase(); } // ------------------------------------------------------------------------------------- @@ -141,7 +145,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { final int numHaplotypes = haplotypeMap.size(); final int readCounts[] = new int[pileup.getNumberOfElements()]; - readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, PoolIndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts); + readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts); n = readHaplotypeLikelihoods.length; } else { Allele refAllele = null; @@ -161,7 +165,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { int idx =0; for (Allele allele : alleles) { int cnt = numSeenBases.get(idx); - numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele)?1:0)); + numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0)); } n++; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java index c2bac4455..a15c9d7da 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java @@ -70,12 +70,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi } - public static HashMap> getIndelLikelihoodMap() { - return IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - } - - - protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, final double[] logLikelihoods, final int ploidy, diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java index 903465538..c6b1a8b7f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java @@ -37,6 +37,13 @@ public class PoolCallerIntegrationTest extends WalkerTest { executeTest("testPoolCaller:"+name+" args=" + args, spec); } + private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) { + final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL", + REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testPoolCaller:"+name+" args=" + args, spec); + } + @Test public void testBOTH_GGA_Pools() { PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4"); @@ -44,7 +51,17 @@ public class PoolCallerIntegrationTest extends WalkerTest { @Test public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLINDEL","d1339990291648495bfcf4404f051478"); + PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478"); + } + + @Test + public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706"); + } + + @Test + public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29"); } @Test diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java index 35a1bb043..c97c4ed28 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; @@ -49,6 +50,13 @@ public class PoolGenotypeLikelihoodsUnitTest { private static final boolean SIMULATE_NOISY_PILEUP = false; private static final int NUM_SIMULATED_OBS = 10; + void PoolGenotypeLikelihoodsUnitTest() { + UAC.minQualityScore = 5; + UAC.maxQualityScore = 40; + UAC.phredScaledPrior = (byte)20; + UAC.minPower = 0.0; + + } @Test public void testStoringLikelihoodElements() { @@ -251,8 +259,6 @@ public class PoolGenotypeLikelihoodsUnitTest { @Test public void testErrorModel() { final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); - final byte minQ = 5; - final byte maxQ = 40; final byte refByte = refPileupTestProvider.getRefByte(); final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; final String refSampleName = refPileupTestProvider.getSampleNames().get(0); @@ -270,7 +276,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // get artificial alignment context for ref sample - no noise Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); - final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refVC, 0.0); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext()); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); @@ -287,8 +293,6 @@ public class PoolGenotypeLikelihoodsUnitTest { @Test public void testIndelErrorModel() { final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); - final byte minQ = 5; - final byte maxQ = 40; final byte refByte = refPileupTestProvider.getRefByte(); final String altBases = "TCA"; final String refSampleName = refPileupTestProvider.getSampleNames().get(0); @@ -313,7 +317,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); - final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refInsertionVC, 0.0); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext()); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); @@ -343,7 +347,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); - final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refDeletionVC, 0.0); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext()); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index f49e78469..6bfe5702d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.indels; +import com.google.java.contract.Ensures; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; @@ -175,7 +176,8 @@ public class PairHMMIndelErrorModel { } - public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup, + @Ensures("result != null && result.length == pileup.getNumberOfElements()") + public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup, final LinkedHashMap haplotypeMap, final ReferenceContext ref, final int eventLength, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index 256f93473..77769a5fe 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -90,7 +90,7 @@ public class ArtificialReadPileupTestProvider { return sampleNames; } public byte getRefByte() { - return refBases.substring(offset,offset+1).getBytes()[0]; + return referenceContext.getBase(); } public ReferenceContext getReferenceContext() { return referenceContext;} @@ -107,7 +107,7 @@ public class ArtificialReadPileupTestProvider { ArrayList vcAlleles = new ArrayList(); Allele refAllele, altAllele; if (eventLength == 0) {// SNP case - refAllele =Allele.create(refBases.substring(offset,offset+1),true); + refAllele =Allele.create(referenceContext.getBase(),true); altAllele = Allele.create(altBases.substring(0,1), false); } else if (eventLength>0){ From baf3e3373031f8fb9137ff5cd8e1508eee80a157 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 26 Jul 2012 23:27:11 -0400 Subject: [PATCH 009/176] Allele refactoring checkpoint 2: all code finally compiles, AD and STR annotations are fixed, and most of the UG integration tests pass. --- .../PoolGenotypeLikelihoodsUnitTest.java | 14 +- .../annotator/DepthPerAlleleBySample.java | 4 +- .../genotyper/ConsensusAlleleCounter.java | 2 +- ...elGenotypeLikelihoodsCalculationModel.java | 36 +-- .../broadinstitute/sting/utils/Haplotype.java | 31 +-- .../sting/utils/variantcontext/Allele.java | 15 +- .../variantcontext/VariantContextUtils.java | 7 +- .../ArtificialReadPileupTestProvider.java | 16 +- .../codecs/vcf/VCFAlleleClipperUnitTest.java | 226 ------------------ .../VariantContextTestProvider.java | 12 +- .../VariantContextUnitTest.java | 45 ++-- .../VariantContextUtilsUnitTest.java | 2 +- .../writer/VCFWriterUnitTest.java | 6 +- 13 files changed, 83 insertions(+), 333 deletions(-) delete mode 100644 public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java index 35a1bb043..a7dd65bb2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java @@ -290,16 +290,16 @@ public class PoolGenotypeLikelihoodsUnitTest { final byte minQ = 5; final byte maxQ = 40; final byte refByte = refPileupTestProvider.getRefByte(); - final String altBases = "TCA"; + final String altBases = refByte + "TCA"; final String refSampleName = refPileupTestProvider.getSampleNames().get(0); final List trueAlleles = new ArrayList(); - trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true)); - trueAlleles.add(Allele.create("TC", false)); + trueAlleles.add(Allele.create(refByte, true)); + trueAlleles.add(Allele.create(refByte + "TC", false)); final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases()); final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles). - genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make(); + genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make(); final int[] matchArray = {95, 995, 9995, 10000}; @@ -329,12 +329,12 @@ public class PoolGenotypeLikelihoodsUnitTest { // create deletion VC final int delLength = 4; final List delAlleles = new ArrayList(); - delAlleles.add(Allele.create(fw.substring(1,delLength+1), true)); - delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false)); + delAlleles.add(Allele.create(fw.substring(0,delLength+1), true)); + delAlleles.add(Allele.create(refByte, false)); final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles). - genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make(); + genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make(); for (int matches: matchArray) { for (int mismatches: mismatchArray) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 261f6433b..a9edab752 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -88,13 +88,13 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa for ( PileupElement p : pileup ) { if ( p.isBeforeInsertion() ) { - final Allele insertion = Allele.create(refBase + p.getEventBases(), false); + final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false); if ( alleleCounts.containsKey(insertion) ) { alleleCounts.put(insertion, alleleCounts.get(insertion)+1); } } else if ( p.isBeforeDeletionStart() ) { - if ( p.getEventLength() == refAllele.length() + 1 ) { + if ( p.getEventLength() == refAllele.length() - 1 ) { // this is indeed the deletion allele recorded in VC final Allele deletion = Allele.create(refBase); if ( alleleCounts.containsKey(deletion) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index d2071a9fb..869e52216 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -255,7 +255,7 @@ public class ConsensusAlleleCounter { else continue; // don't go on with this allele if refBases are non-standard } else { // insertion case - final String insertionBases = ref.getBase() + s; // add reference padding + final String insertionBases = (char)ref.getBase() + s; // add reference padding if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions refAllele = Allele.create(ref.getBase(), true); altAllele = Allele.create(insertionBases, false); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 7eabe7a18..bedffa690 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Haplotype; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; @@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private boolean DEBUG = false; private boolean ignoreSNPAllelesWhenGenotypingIndels = false; private PairHMMIndelErrorModel pairModel; - private boolean allelesArePadded; - + private static ThreadLocal>> indelLikelihoodMap = new ThreadLocal>>() { protected synchronized HashMap> initialValue() { @@ -105,22 +103,18 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood indelLikelihoodMap.set(new HashMap>()); haplotypeMap.clear(); - Pair,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); - alleleList = pair.first; - allelesArePadded = pair.second; + alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); if (alleleList.isEmpty()) return null; } - getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements if (haplotypeMap == null || haplotypeMap.isEmpty()) return null; // start making the VariantContext // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base. - - final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded); + final int endLoc = loc.getStart() + alleleList.get(0).length() - 1; final int eventLength = getEventLength(alleleList); final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList); @@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood return indelLikelihoodMap.get(); } - public static int computeEndLocation(final List alleles, final GenomeLoc loc, final boolean allelesArePadded) { - Allele refAllele = alleles.get(0); - int endLoc = loc.getStart() + refAllele.length()-1; - if (allelesArePadded) - endLoc++; - - return endLoc; - } - public static void getHaplotypeMapFromAlleles(final List alleleList, final ReferenceContext ref, final GenomeLoc loc, @@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } - public static Pair,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker, + public static List getInitialAlleleList(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map contexts, final AlignmentContextUtils.ReadOrientation contextType, final GenomeLocParser locParser, final UnifiedArgumentCollection UAC, final boolean ignoreSNPAllelesWhenGenotypingIndels) { - + List alleles = new ArrayList(); - boolean allelesArePadded = true; if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { VariantContext vc = null; for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { @@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } // ignore places where we don't have a variant if (vc == null) - return new Pair,Boolean>(alleles,false); + return alleles; if (ignoreSNPAllelesWhenGenotypingIndels) { // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it @@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } else { alleles.addAll(vc.getAlleles()); } - if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1) - allelesArePadded = false; - - } else { - alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); + alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); } - return new Pair,Boolean> (alleles,allelesArePadded); + return alleles; } // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup, diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 829e75682..143a053c9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -204,8 +204,11 @@ public class Haplotype { return new Haplotype(newHaplotype); } - public static LinkedHashMap makeHaplotypeListFromAlleles(List alleleList, int startPos, ReferenceContext ref, - final int haplotypeSize, final int numPrefBases) { + public static LinkedHashMap makeHaplotypeListFromAlleles(final List alleleList, + final int startPos, + final ReferenceContext ref, + final int haplotypeSize, + final int numPrefBases) { LinkedHashMap haplotypeMap = new LinkedHashMap(); @@ -216,7 +219,6 @@ public class Haplotype { refAllele = a; break; } - } if (refAllele == null) @@ -224,19 +226,12 @@ public class Haplotype { byte[] refBases = ref.getBases(); + final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart(); + final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases)); - int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart()); - //int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event - - - byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases); - int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length; // protect against long events that overrun available reference context - if (startAfter > refBases.length) - startAfter = refBases.length; - byte[] basesAfterVariant = Arrays.copyOfRange(refBases, - startAfter, refBases.length); - + final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length); + final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length)); // Create location for all haplotypes final int startLoc = ref.getWindow().getStart() + startIdxInReference; @@ -244,16 +239,14 @@ public class Haplotype { final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc); - for (final Allele a : alleleList) { - byte[] alleleBases = a.getBases(); + final byte[] alleleBases = a.getBases(); // use string concatenation - String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant); + String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant; haplotypeString = haplotypeString.substring(0,haplotypeSize); - haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); - + haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); } return haplotypeMap; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index 1947ef01e..aa63a9dac 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -90,16 +90,23 @@ public class Allele implements Comparable { // null alleles are no longer allowed if ( wouldBeNullAllele(bases) ) { throw new IllegalArgumentException("Null alleles are not supported"); - } else if ( wouldBeNoCallAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; + } + + // no-calls are represented as no bases + if ( wouldBeNoCallAllele(bases) ) { + this.bases = EMPTY_ALLELE_BASES; isNoCall = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); - } else if ( wouldBeSymbolicAllele(bases) ) { + return; + } + + if ( wouldBeSymbolicAllele(bases) ) { isSymbolic = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele"); } - else + else { bases = BaseUtils.convertToUpperCase(bases); + } this.isRef = isRef; this.bases = bases; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index e1a043e94..e9388205f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -1163,13 +1163,14 @@ public class VariantContextUtils { if ( ! vc.isIndel() ) // only indels are tandem repeats return null; - final Allele ref = vc.getReference(); + final Allele refAllele = vc.getReference(); + final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); byte[] repeatUnit = null; final ArrayList lengths = new ArrayList(); for ( final Allele allele : vc.getAlternateAlleles() ) { - Pair result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes()); + Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes()); final int[] repetitionCount = result.first; // repetition count = 0 means allele is not a tandem expansion of context @@ -1184,7 +1185,7 @@ public class VariantContextUtils { repeatUnit = result.second; if (VERBOSE) { System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); - System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0])); + System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0])); System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); System.out.println("RU:"+new String(repeatUnit)); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index 256f93473..aa4885406 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -103,22 +103,23 @@ public class ArtificialReadPileupTestProvider { boolean addBaseErrors, int phredScaledBaseErrorRate) { // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); - ArrayList vcAlleles = new ArrayList(); + String refBase = refBases.substring(offset,offset+1); // referenceContext.getBase()? Allele refAllele, altAllele; - if (eventLength == 0) {// SNP case - refAllele =Allele.create(refBases.substring(offset,offset+1),true); + if (eventLength == 0) { + // SNP case + refAllele = Allele.create(refBase,true); altAllele = Allele.create(altBases.substring(0,1), false); } else if (eventLength>0){ // insertion - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(altBases.substring(0,eventLength), false); + refAllele = Allele.create(refBase,true); + altAllele = Allele.create(refBase + altBases.substring(0,eventLength), false); } else { // deletion - refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + refAllele = Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); + altAllele = Allele.create(refBase, false); } int stop = loc.getStart(); vcAlleles.add(refAllele); @@ -127,7 +128,6 @@ public class ArtificialReadPileupTestProvider { final VariantContextBuilder builder = new VariantContextBuilder().source(""); builder.loc(loc.getContig(), loc.getStart(), stop); builder.alleles(vcAlleles); - builder.referenceBaseForIndel(referenceContext.getBase()); builder.noGenotypes(); final VariantContext vc = builder.make(); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java deleted file mode 100644 index 8cd051e01..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -package org.broadinstitute.sting.utils.codecs.vcf; - -import com.google.java.contract.Requires; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.testng.Assert; -import org.testng.SkipException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - -public class VCFAlleleClipperUnitTest extends BaseTest { - // -------------------------------------------------------------------------------- - // - // Test allele clipping - // - // -------------------------------------------------------------------------------- - - private class ClipAllelesTest extends TestDataProvider { - final int position; - final int stop; - final String ref; - List inputs; - List expected; - - @Requires("arg.length % 2 == 0") - private ClipAllelesTest(final int position, final int stop, final String ... arg) { - super(ClipAllelesTest.class); - this.position = position; - this.stop = stop; - this.ref = arg[0]; - - int n = arg.length / 2; - inputs = new ArrayList(n); - expected = new ArrayList(n); - - for ( int i = 0; i < n; i++ ) { - final boolean ref = i % n == 0; - inputs.add(Allele.create(arg[i], ref)); - } - for ( int i = n; i < arg.length; i++ ) { - final boolean ref = i % n == 0; - expected.add(Allele.create(arg[i], ref)); - } - } - - public boolean isClipped() { - for ( int i = 0; i < inputs.size(); i++ ) { - if ( inputs.get(i).length() != expected.get(i).length() ) - return true; - } - - return false; - } - - public String toString() { - return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected); - } - } - @DataProvider(name = "ClipAllelesTest") - public Object[][] makeClipAllelesTest() { - // do no harm - new ClipAllelesTest(10, 10, "A", "A"); - new ClipAllelesTest(10, 10, "A", "C", "A", "C"); - new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G"); - - // insertions - new ClipAllelesTest(10, 10, "A", "AA", "-", "A"); - new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA"); - new ClipAllelesTest(10, 10, "A", "AG", "-", "G"); - - // deletions - new ClipAllelesTest(10, 11, "AA", "A", "A", "-"); - new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-"); - new ClipAllelesTest(10, 11, "AG", "A", "G", "-"); - new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-"); - - // multi-allelic insertion and deletions - new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA"); - new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG"); - new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA"); - new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA"); - new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG"); - new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G"); - - // cannot be clipped - new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG"); - new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG"); - - // symbolic - new ClipAllelesTest(10, 100, "A", "", "A", ""); - new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]"); - new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T"); - new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51["); - new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]"); - - // symbolic with alleles that should be clipped - new ClipAllelesTest(10, 100, "A", "", "AA", "-", "", "A"); - new ClipAllelesTest(10, 100, "AA", "", "A", "A", "", "-"); - new ClipAllelesTest(10, 100, "AA", "", "A", "AAA", "A", "", "-", "AA"); - new ClipAllelesTest(10, 100, "AG", "", "A", "AGA", "G", "", "-", "GA"); - new ClipAllelesTest(10, 100, "G", "", "A", "G", "", "A"); - - // clipping from both ends - // - // TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END - // -// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T"); -// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T"); -// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T"); -// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT"); -// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT"); -// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT"); - - // complex substitutions - new ClipAllelesTest(10, 10, "A", "GA", "A", "GA"); - - return ClipAllelesTest.getTests(ClipAllelesTest.class); - } - - @Test(dataProvider = "ClipAllelesTest") - public void testClipAllelesTest(ClipAllelesTest cfg) { - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop); - Assert.assertNull(clipped.getError(), "Unexpected error occurred"); - Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop"); - Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles"); - } - - @Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest") - public void testPaddingAllelesInVC(final ClipAllelesTest cfg) { - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop); - final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles()) - .referenceBaseForIndel(clipped.getRefBaseForIndel()).make(); - - if ( vc.isMixed() && vc.hasSymbolicAlleles() ) - throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized"); - - Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method"); - - if ( cfg.isClipped() ) { - // TODO - // TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is - // TODO actually the original stop, as the original stop is +1 its true size. - // TODO - final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1); - - final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); - Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start"); - Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles"); - Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)"); - Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again"); - } - } - - // -------------------------------------------------------------------------------- - // - // basic allele clipping test - // - // -------------------------------------------------------------------------------- - - private class ReverseClippingPositionTestProvider extends TestDataProvider { - final String ref; - final List alleles = new ArrayList(); - final int expectedClip; - - private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) { - super(ReverseClippingPositionTestProvider.class); - this.ref = ref; - for ( final String allele : alleles ) - this.alleles.add(Allele.create(allele)); - this.expectedClip = expectedClip; - } - - @Override - public String toString() { - return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); - } - } - - @DataProvider(name = "ReverseClippingPositionTestProvider") - public Object[][] makeReverseClippingPositionTestProvider() { - // pair clipping - new ReverseClippingPositionTestProvider(0, "ATT", "CCG"); - new ReverseClippingPositionTestProvider(1, "ATT", "CCT"); - new ReverseClippingPositionTestProvider(2, "ATT", "CTT"); - new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele - - // triplets - new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG"); - new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go - new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go - - return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); - } - - - @Test(dataProvider = "ReverseClippingPositionTestProvider") - public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { - int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); - Assert.assertEquals(result, cfg.expectedClip); - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 1a0e8e39d..b95e589b7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -225,10 +225,10 @@ public class VariantContextTestProvider { add(builder()); add(builder().alleles("A")); add(builder().alleles("A", "C", "T")); - add(builder().alleles("-", "C").referenceBaseForIndel("A")); - add(builder().alleles("-", "CAGT").referenceBaseForIndel("A")); - add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A")); - add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A")); + add(builder().alleles("A", "AC")); + add(builder().alleles("A", "ACAGT")); + add(builder().loc("1", 10, 11).alleles("AC", "A")); + add(builder().loc("1", 10, 13).alleles("ACGT", "A")); // make sure filters work add(builder().unfiltered()); @@ -302,8 +302,8 @@ public class VariantContextTestProvider { sites.add(builder().alleles("A").make()); sites.add(builder().alleles("A", "C", "T").make()); - sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make()); - sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make()); + sites.add(builder().alleles("A", "AC").make()); + sites.add(builder().alleles("A", "ACAGT").make()); for ( VariantContext site : sites ) { addGenotypes(site); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 11c75ed9a..46153221d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest { int snpLocStart = 10; int snpLocStop = 10; - // - / ATC [ref] from 20-23 + // - / ATC [ref] from 20-22 String delLoc = "chr1"; int delLocStart = 20; - int delLocStop = 23; + int delLocStop = 22; // - [ref] / ATC from 20-20 String insLoc = "chr1"; int insLocStart = 20; int insLocStop = 20; - // - / A / T / ATC [ref] from 20-23 - String mixedLoc = "chr1"; - int mixedLocStart = 20; - int mixedLocStop = 23; - VariantContextBuilder basicBuilder, snpBuilder, insBuilder; @BeforeSuite public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); + del = Allele.create("A"); + delRef = Allele.create("A", true); A = Allele.create("A"); C = Allele.create("C"); @@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest { @BeforeMethod public void beforeTest() { - basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); - snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); - insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A'); + basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)); + snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)); + insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)); } @Test @@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testCreatingDeletionVariantContext() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); + VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); Assert.assertEquals(vc.getChr(), delLoc); Assert.assertEquals(vc.getStart(), delLocStart); @@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testMatchingAlleles() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); - VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make(); + VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); + VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make(); Assert.assertTrue(vc.hasSameAllelesAs(vc2)); Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2)); @@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testRepeatAllele() { - Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true); - Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false); - Allele atc = Allele.create("ATC", false); - Allele atcatc = Allele.create("ATCATC", false); - Allele ccccR = Allele.create("CCCC", true); - Allele cc = Allele.create("CC", false); - Allele cccccc = Allele.create("CCCCCC", false); - Allele gagaR = Allele.create("GAGA", true); - Allele gagagaga = Allele.create("GAGAGAGA", false); + Allele nullR = Allele.create("A", true); + Allele nullA = Allele.create("A", false); + Allele atc = Allele.create("AATC", false); + Allele atcatc = Allele.create("AATCATC", false); + Allele ccccR = Allele.create("ACCCC", true); + Allele cc = Allele.create("ACC", false); + Allele cccccc = Allele.create("ACCCCCC", false); + Allele gagaR = Allele.create("AGAGA", true); + Allele gagagaga = Allele.create("AGAGAGAGA", false); Pair,byte[]> result; byte[] refBytes = "TATCATCATCGGA".getBytes(); @@ -678,7 +673,7 @@ public class VariantContextUnitTest extends BaseTest { @Test(dataProvider = "getAlleles") public void testMergeAlleles(GetAllelesTest cfg) { final List altAlleles = cfg.alleles.subList(1, cfg.alleles.size()); - final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make(); + final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make(); Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles"); Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size"); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index b09a10d07..8c86a54de 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -99,7 +99,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { private VariantContext makeVC(String source, List alleles, Collection genotypes, Set filters) { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; - return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make(); + return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make(); } // -------------------------------------------------------------------------------- diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java index a7fff4559..5876efa12 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java @@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest { Map attributes = new HashMap(); GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size()); - alleles.add(Allele.create("-",true)); - alleles.add(Allele.create("CC",false)); + alleles.add(Allele.create("A",true)); + alleles.add(Allele.create("ACC",false)); attributes.put("DP","50"); for (String name : header.getGenotypeSamples()) { @@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest { genotypes.add(gt); } return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles) - .genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make(); + .genotypes(genotypes).attributes(attributes).make(); } From 9e2209694a8853e6cb2fa53609fbdf869d2f0a85 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 27 Jul 2012 00:47:15 -0400 Subject: [PATCH 010/176] Re-enable reverse trimming of alleles in UG engine when sub-selecting alleles after genotyping. UG integration tests now pass. --- .../genotyper/UnifiedGenotyperEngine.java | 6 +- .../variantcontext/VariantContextUtils.java | 80 +++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index d4c45e19d..f73ab2471 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -485,8 +485,10 @@ public class UnifiedGenotyperEngine { builder.attributes(attributes); VariantContext vcCall = builder.make(); - // TODO -- if we are subsetting alleles (either because there were too many or because some were not polymorphic) - // TODO -- then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). + // if we are subsetting alleles (either because there were too many or because some were not polymorphic) + // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). + if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) + vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); if ( annotationEngine != null && !limitedContext ) { // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index e9388205f..70d365ef8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -1334,4 +1334,84 @@ public class VariantContextUtils { return start + Math.max(ref.length() - 1, 0); } } + + public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { + + // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed + + // see whether we need to trim common reference base from all alleles + + final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); + if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) + return inputVC; + + final List alleles = new ArrayList(); + final GenotypesContext genotypes = GenotypesContext.create(); + final Map originalToTrimmedAlleleMap = new HashMap(); + + for (final Allele a : inputVC.getAlleles()) { + if (a.isSymbolic()) { + alleles.add(a); + originalToTrimmedAlleleMap.put(a, a); + } else { + // get bases for current allele and create a new one with trimmed bases + final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); + final Allele trimmedAllele = Allele.create(newBases, a.isReference()); + alleles.add(trimmedAllele); + originalToTrimmedAlleleMap.put(a, trimmedAllele); + } + } + + // now we can recreate new genotypes with trimmed alleles + for ( final Genotype genotype : inputVC.getGenotypes() ) { + final List originalAlleles = genotype.getAlleles(); + final List trimmedAlleles = new ArrayList(); + for ( final Allele a : originalAlleles ) { + if ( a.isCalled() ) + trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); + else + trimmedAlleles.add(Allele.NO_CALL); + } + genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); + } + + return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make(); + } + + public static int computeReverseClipping(final List unclippedAlleles, + final byte[] ref, + final int forwardClipping, + final boolean allowFullClip) { + int clipping = 0; + boolean stillClipping = true; + + while ( stillClipping ) { + for ( final Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) + continue; + + // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong + // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). + if ( a.length() - clipping == 0 ) + return clipping - (allowFullClip ? 0 : 1); + + if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { + stillClipping = false; + } + else if ( ref.length == clipping ) { + if ( allowFullClip ) + stillClipping = false; + else + return -1; + } + else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { + stillClipping = false; + } + } + if ( stillClipping ) + clipping++; + } + + return clipping; + } } From ef335b6213fee62121876ca6ae8dd3b47437a912 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 27 Jul 2012 02:14:25 -0400 Subject: [PATCH 011/176] Several more walkers have been brought up to use the new Allele representation. --- .../fasta/FastaAlternateReference.java | 4 +-- .../gatk/walkers/indels/IndelRealigner.java | 8 +++++- .../variantutils/LeftAlignVariants.java | 27 ++++++++++--------- .../utils/codecs/vcf/AbstractVCFCodec.java | 12 ++++++++- .../utils/variantcontext/VariantContext.java | 7 +++-- ...astaAlternateReferenceIntegrationTest.java | 2 +- 6 files changed, 38 insertions(+), 22 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java index 9c9a75fc4..92549b821 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java @@ -107,11 +107,11 @@ public class FastaAlternateReference extends FastaReference { continue; if ( vc.isSimpleDeletion()) { - deletionBasesRemaining = vc.getReference().length(); + deletionBasesRemaining = vc.getReference().length() - 1; // delete the next n bases, not this one return new Pair(context.getLocation(), refBase); } else if ( vc.isSimpleInsertion()) { - return new Pair(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); + return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); } else if (vc.isSNP()) { return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 2153525ab..5e0f15e6a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -872,7 +872,13 @@ public class IndelRealigner extends ReadWalker { for ( VariantContext knownIndel : knownIndelsToTry ) { if ( knownIndel == null || !knownIndel.isIndel() || knownIndel.isComplexIndel() ) continue; - byte[] indelStr = knownIndel.isSimpleInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); + final byte[] indelStr; + if ( knownIndel.isSimpleInsertion() ) { + final byte[] fullAllele = knownIndel.getAlternateAllele(0).getBases(); + indelStr = Arrays.copyOfRange(fullAllele, 1, fullAllele.length); // remove ref padding + } else { + indelStr = Utils.dupBytes((byte)'-', knownIndel.getReference().length() - 1); + } int start = knownIndel.getStart() - leftmostIndex + 1; Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel); if ( c != null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index e0b723659..9fe499a03 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -139,11 +139,11 @@ public class LeftAlignVariants extends RodWalker { final byte[] refSeq = ref.getBases(); // get the indel length - int indelLength; + final int indelLength; if ( vc.isSimpleDeletion() ) - indelLength = vc.getReference().length(); + indelLength = vc.getReference().length() - 1; else - indelLength = vc.getAlternateAllele(0).length(); + indelLength = vc.getAlternateAllele(0).length() - 1; if ( indelLength > 200 ) { writer.add(vc); @@ -151,7 +151,7 @@ public class LeftAlignVariants extends RodWalker { } // create an indel haplotype - int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; + final int originalIndex = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; final byte[] originalIndel = makeHaplotype(vc, refSeq, originalIndex, indelLength); // create a CIGAR string to represent the event @@ -170,11 +170,12 @@ public class LeftAlignVariants extends RodWalker { VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make(); //System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference)); - int indelIndex = originalIndex-difference; - byte[] newBases = new byte[indelLength]; - System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength); - Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); - newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]); + final int indelIndex = originalIndex-difference; + final byte[] newBases = new byte[indelLength + 1]; + newBases[0] = refSeq[indelIndex-1]; + System.arraycopy((vc.isSimpleDeletion() ? refSeq : originalIndel), indelIndex, newBases, 1, indelLength); + final Allele newAllele = Allele.create(newBases, vc.isSimpleDeletion()); + newVC = updateAllele(newVC, newAllele); writer.add(newVC); return 1; @@ -195,7 +196,7 @@ public class LeftAlignVariants extends RodWalker { if ( vc.isSimpleDeletion() ) { indexOfRef += indelLength; } else { - System.arraycopy(vc.getAlternateAllele(0).getBases(), 0, hap, currentPos, indelLength); + System.arraycopy(vc.getAlternateAllele(0).getBases(), 1, hap, currentPos, indelLength); currentPos += indelLength; } @@ -205,14 +206,14 @@ public class LeftAlignVariants extends RodWalker { return hap; } - public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) { + public static VariantContext updateAllele(final VariantContext vc, final Allele newAllele) { // create a mapping from original allele to new allele HashMap alleleMap = new HashMap(vc.getAlleles().size()); if ( newAllele.isReference() ) { alleleMap.put(vc.getReference(), newAllele); - alleleMap.put(vc.getAlternateAllele(0), vc.getAlternateAllele(0)); + alleleMap.put(vc.getAlternateAllele(0), Allele.create(newAllele.getBases()[0], false)); } else { - alleleMap.put(vc.getReference(), vc.getReference()); + alleleMap.put(vc.getReference(), Allele.create(newAllele.getBases()[0], true)); alleleMap.put(vc.getAlternateAllele(0), newAllele); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 2b5695e3a..996cef8a4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -248,7 +248,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec builder.id(parts[2]); final String ref = getCachedString(parts[3].toUpperCase()); - builder.stop(pos + ref.length() - 1); final String alts = getCachedString(parts[4].toUpperCase()); builder.log10PError(parseQual(parts[5])); @@ -257,6 +256,17 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec final Map attrs = parseInfo(parts[7]); builder.attributes(attrs); + if ( attrs.containsKey(VCFConstants.END_KEY) ) { + // update stop with the end key if provided + try { + builder.stop(Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString())); + } catch (Exception e) { + generateException("the END value in the INFO field is not valid"); + } + } else { + builder.stop(pos + ref.length() - 1); + } + // get our alleles, filters, and setup an attribute map final List alleles = parseAlleles(ref, alts, lineNo); builder.alleles(alleles); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index f298f1187..72681ae35 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -496,7 +496,7 @@ public class VariantContext implements Feature { // to enable tribble integratio */ public boolean isSimpleInsertion() { // can't just call !isSimpleDeletion() because of complex indels - return getType() == Type.INDEL && isBiallelic() && getReference().length() < getAlternateAllele(0).length(); + return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1; } /** @@ -504,7 +504,7 @@ public class VariantContext implements Feature { // to enable tribble integratio */ public boolean isSimpleDeletion() { // can't just call !isSimpleInsertion() because of complex indels - return getType() == Type.INDEL && isBiallelic() && getReference().length() > getAlternateAllele(0).length(); + return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1; } /** @@ -1120,8 +1120,7 @@ public class VariantContext implements Feature { // to enable tribble integratio if ( hasAttribute(VCFConstants.END_KEY) ) { final int end = getAttributeAsInt(VCFConstants.END_KEY, -1); assert end != -1; - if ( end != getEnd() && end != getEnd() + 1 ) { - // the end is allowed to 1 bigger because of the padding + if ( end != getEnd() ) { final String message = "Badly formed variant context at location " + getChr() + ":" + getStart() + "; getEnd() was " + getEnd() + " but this VariantContext contains an END key with value " + end; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 1c5db4262..4611f3a40 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec( "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", 1, - Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); + Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65")); executeTest("testFastaAlternateReferenceIndels", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( From a0890126a8ee61687486434b5b6e6567bfdfd973 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 27 Jul 2012 11:01:39 -0400 Subject: [PATCH 012/176] ActiveRegionWalker's isActive function returns a results object now instead of just a double. --- .../haplotypecaller/HaplotypeCaller.java | 13 ++--- .../gatk/iterators/LocusIteratorByState.java | 3 +- .../traversals/TraverseActiveRegions.java | 11 ++-- .../gatk/walkers/ActiveRegionWalker.java | 3 +- .../broadinstitute/sting/utils/MathUtils.java | 10 ++++ .../utils/activeregion/ActivityProfile.java | 52 +++++++++++-------- .../activeregion/ActivityProfileResult.java | 31 +++++++++++ .../activeregion/ActivityProfileUnitTest.java | 11 +++- 8 files changed, 96 insertions(+), 38 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index f5f707690..a7c2ff23c 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; @@ -303,8 +304,8 @@ public class HaplotypeCaller extends ActiveRegionWalker implem public boolean wantsNonPrimaryReads() { return true; } @Override - @Ensures({"result >= 0.0", "result <= 1.0"}) - public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { + @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"}) + public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) { @@ -313,15 +314,15 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } } if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) { - return 1.0; + return new ActivityProfileResult(1.0); } } if( USE_ALLELES_TRIGGER ) { - return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); + return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); } - if( context == null ) { return 0.0; } + if( context == null ) { return new ActivityProfileResult(0.0); } final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied noCall.add(Allele.NO_CALL); @@ -362,7 +363,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_ALT_ALLELE ); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); - return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) ); + return new ActivityProfileResult( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 6ff9f3bd5..7d035d208 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -279,7 +279,6 @@ public class LocusIteratorByState extends LocusIterator { */ private void lazyLoadNextAlignmentContext() { while (nextAlignmentContext == null && readStates.hasNext()) { - // this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref: readStates.collectPendingReads(); final GenomeLoc location = getLocation(); @@ -378,7 +377,7 @@ public class LocusIteratorByState extends LocusIterator { CigarOperator op = state.stepForwardOnGenome(); if (op == null) { // we discard the read only when we are past its end AND indel at the end of the read (if any) was - // already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe + // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. it.remove(); // we've stepped off the end of the object } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 1b9c12fb0..845c4eacf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActivityProfile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -69,8 +70,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine walker, + private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker walker, final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext locus, final GenomeLoc location) { if ( walker.hasPresetActiveRegions() ) { - return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0; + return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); } else { return walker.isActive( tracker, refContext, locus ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index e38e166ea..b2975cbbf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; @@ -72,7 +73,7 @@ public abstract class ActiveRegionWalker extends Walker isActiveList; + final List isActiveList; private GenomeLoc lastLoc = null; private static final int FILTER_SIZE = 65; - private static final Double[] GaussianKernel; + private static final double[] GaussianKernel; static { - GaussianKernel = new Double[2*FILTER_SIZE + 1]; + GaussianKernel = new double[2*FILTER_SIZE + 1]; for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii); } @@ -63,22 +62,22 @@ public class ActivityProfile { // todo -- add unit tests // TODO -- own preset regions public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) { - this(parser, presetRegions, new ArrayList(), null); + this(parser, presetRegions, new ArrayList(), null); } - protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { + protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { this.parser = parser; this.presetRegions = presetRegions; this.isActiveList = isActiveList; this.regionStartLoc = regionStartLoc; } - public void add(final GenomeLoc loc, final double score) { + public void add(final GenomeLoc loc, final ActivityProfileResult result) { if ( loc.size() != 1 ) throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" ); if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 ) throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc); - isActiveList.add(score); + isActiveList.add(result); if( regionStartLoc == null ) { regionStartLoc = loc; } @@ -93,22 +92,33 @@ public class ActivityProfile { * @return a new ActivityProfile that's the band-pass filtered version of this profile */ public ActivityProfile bandPassFilter() { - final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]); - final Double[] filteredProbArray = new Double[activeProbArray.length]; + final double[] activeProbArray = new double[isActiveList.size()]; + int iii = 0; + for( final ActivityProfileResult result : isActiveList ) { + activeProbArray[iii++] = result.isActiveProb; + } + final double[] filteredProbArray = new double[activeProbArray.length]; if( !presetRegions ) { - for( int iii = 0; iii < activeProbArray.length; iii++ ) { - final Double[] kernel = (Double[]) ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); - final Double[] activeProbSubArray = (Double[]) ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); + for( iii = 0; iii < activeProbArray.length; iii++ ) { + final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); + final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); } } - return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc); + iii = 0; + for( final double prob : filteredProbArray ) { + final ActivityProfileResult result = isActiveList.get(iii++); + result.isActiveProb = prob; + result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE; + result.resultValue = null; + } + return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc); } /** * Partition this profile into active regions - * @param activeRegionExtension - * @return + * @param activeRegionExtension the amount of margin overlap in the active region + * @return the list of active regions */ public List createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) { final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author @@ -119,14 +129,14 @@ public class ActivityProfile { return Collections.emptyList(); } else if( isActiveList.size() == 1 ) { // there's a single element, it's either active or inactive - boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD; + boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize)); } else { // there are 2+ elements, divide these up into regions - boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD; + boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; int curStart = 0; for(int iii = 1; iii < isActiveList.size(); iii++ ) { - final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD; + final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD; if( isActive != thisStatus ) { returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize)); isActive = thisStatus; @@ -143,7 +153,7 @@ public class ActivityProfile { * @param isActive should the region be active? * @param curStart offset (0-based) from the start of this region * @param curEnd offset (0-based) from the start of this region - * @param activeRegionExtension + * @param activeRegionExtension the amount of margin overlap in the active region * @return a fully initialized ActiveRegion with the above properties */ private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { @@ -161,7 +171,7 @@ public class ActivityProfile { final int size = curEnd - curStart + 1; for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) { - if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; } + if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; } } final List leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList()); final List rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java new file mode 100644 index 000000000..8dc29aa3c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.utils.activeregion; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin + * Date: 7/27/12 + */ + +public class ActivityProfileResult { + public double isActiveProb; + public ActivityProfileResultState resultState; + public Number resultValue; + + public enum ActivityProfileResultState { + NONE, + HIGH_QUALITY_SOFT_CLIPS + } + + public ActivityProfileResult( final double isActiveProb ) { + this.isActiveProb = isActiveProb; + this.resultState = ActivityProfileResultState.NONE; + this.resultValue = null; + } + + public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + this.isActiveProb = isActiveProb; + this.resultState = resultState; + this.resultValue = resultValue; + } + +} diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 282f19d8a..f7c564c74 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -123,12 +123,12 @@ public class ActivityProfileUnitTest extends BaseTest { for ( int i = 0; i < cfg.probs.size(); i++ ) { double p = cfg.probs.get(i); GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(loc, p); + profile.add(loc, new ActivityProfileResult(p)); } Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); Assert.assertEquals(profile.size(), cfg.probs.size()); - Assert.assertEquals(profile.isActiveList, cfg.probs); + assertProbsAreEqual(profile.isActiveList, cfg.probs); assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); } @@ -140,5 +140,12 @@ public class ActivityProfileUnitTest extends BaseTest { } } + private void assertProbsAreEqual(List actual, List expected) { + Assert.assertEquals(actual.size(), expected.size()); + for ( int i = 0; i < actual.size(); i++ ) { + Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i)); + } + } + // todo -- test extensions } \ No newline at end of file From 22bb4804f0445ce2747ccc9b362db2a4272dc86c Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 27 Jul 2012 12:44:02 -0400 Subject: [PATCH 013/176] HaplotypeCaller now use an excessive number of high quality soft clips as a triggering signal in order to capture both end points of a large deletion in a single active region. --- .../haplotypecaller/GenotypingEngine.java | 2 +- .../haplotypecaller/HaplotypeCaller.java | 11 ++++-- .../LikelihoodCalculationEngine.java | 4 +-- .../utils/activeregion/ActivityProfile.java | 16 +++++++-- .../sting/utils/sam/AlignmentUtils.java | 35 +++++++++++++++++++ 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index e2445e926..6ea735ec0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -317,7 +317,7 @@ public class GenotypingEngine { } protected void mergeConsecutiveEventsBasedOnLD( final ArrayList haplotypes, final TreeSet startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) { - final int MAX_SIZE_TO_COMBINE = 10; + final int MAX_SIZE_TO_COMBINE = 15; final double MERGE_EVENTS_R2_THRESHOLD = 0.95; if( startPosKeySet.size() <= 1 ) { return; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index a7c2ff23c..14ea17483 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fragments.FragmentCollection; import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.*; @@ -104,7 +105,7 @@ import java.util.*; @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.LOCUS) -@ActiveRegionExtension(extension=65, maxRegion=275) +@ActiveRegionExtension(extension=65, maxRegion=300) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { /** @@ -329,6 +330,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); + final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); for( final String sample : splitContexts.keySet() ) { final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event) Arrays.fill(genotypeLikelihoods, 0.0); @@ -349,6 +351,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) { AA = 2; BB = 0; + if( p.isNextToSoftClip() ) { + averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28)); + } } } genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual); @@ -363,7 +368,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_ALT_ALLELE ); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); - return new ActivityProfileResult( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) ); + final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); + + return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 535508d09..365459882 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -103,7 +103,7 @@ public class LikelihoodCalculationEngine { readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated - readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); + readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); } for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { @@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine { int hap1 = 0; int hap2 = 0; //double bestElement = Double.NEGATIVE_INFINITY; - final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 ); + final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 ); while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { double maxElement = Double.NEGATIVE_INFINITY; for( int iii = 0; iii < numHaplotypes; iii++ ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 1c91551db..7e736b7bf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -47,13 +47,13 @@ public class ActivityProfile { GenomeLoc regionStartLoc = null; final List isActiveList; private GenomeLoc lastLoc = null; - private static final int FILTER_SIZE = 65; + private static final int FILTER_SIZE = 80; private static final double[] GaussianKernel; static { GaussianKernel = new double[2*FILTER_SIZE + 1]; for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { - GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii); + GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); } } @@ -97,6 +97,16 @@ public class ActivityProfile { for( final ActivityProfileResult result : isActiveList ) { activeProbArray[iii++] = result.isActiveProb; } + iii = 0; + for( final ActivityProfileResult result : isActiveList ) { + if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups + final int numHQClips = result.resultValue.intValue(); + for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) { + activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]); + } + } + iii++; + } final double[] filteredProbArray = new double[activeProbArray.length]; if( !presetRegions ) { for( iii = 0; iii < activeProbArray.length; iii++ ) { @@ -170,7 +180,7 @@ public class ActivityProfile { int cutPoint = -1; final int size = curEnd - curStart + 1; - for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) { + for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) { if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; } } final List leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList()); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index e5e747c2d..895de3578 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -405,6 +406,40 @@ public class AlignmentUtils { return alignment; } + public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) { + + int numHQSoftClips = 0; + int alignPos = 0; + final Cigar cigar = read.getCigar(); + final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION ); + + for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) { + + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch( ce.getOperator() ) { + case S: + for( int jjj = 0; jjj < elementLength; jjj++ ) { + if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; } + } + break; + case M: + case I: + alignPos += elementLength; + break; + case H: + case P: + case D: + case N: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + return numHQSoftClips; + } + public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) { return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus ); } From 27e7e11ec0e88f1aa1cde5968bf66bc3284f5402 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 27 Jul 2012 15:48:40 -0400 Subject: [PATCH 014/176] Allele refactoring checkpoint #3: all integration tests except for PoolCaller are passing now. Fixed a couple of bugs from old code that popped up during md5 difference review. Added VariantContextUtils.requiresPaddingBase() method for tools that create alleles to use for determining whether or not to add the ref padding base. One of the HaplotypeCaller tests wasn't passing because of RankSumTest differences, so I added a TODO for Ryan to look into this. --- .../haplotypecaller/GenotypingEngine.java | 9 ++++- .../HaplotypeCallerIntegrationTest.java | 5 ++- .../gatk/refdata/VariantContextAdaptors.java | 37 +++++++++++++------ .../validation/ValidationAmplicons.java | 22 +++++++++-- .../walkers/variantutils/VariantsToTable.java | 2 +- .../broadinstitute/sting/utils/Haplotype.java | 9 +++-- .../utils/variantcontext/VariantContext.java | 10 ++--- .../variantcontext/VariantContextUtils.java | 35 ++++++++++++++++-- .../ValidationAmpliconsIntegrationTest.java | 6 +-- .../CombineVariantsIntegrationTest.java | 8 ++-- .../utils/codecs/vcf/VCFIntegrationTest.java | 4 +- 11 files changed, 106 insertions(+), 41 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index ad468f657..678a65024 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -183,8 +183,13 @@ public class GenotypingEngine { } @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) - public List>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList haplotypes, final byte[] ref, final GenomeLoc refLoc, - final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final ArrayList activeAllelesToGenotype ) { + public List>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, + final ArrayList haplotypes, + final byte[] ref, + final GenomeLoc refLoc, + final GenomeLoc activeRegionWindow, + final GenomeLocParser genomeLocParser, + final ArrayList activeAllelesToGenotype ) { final ArrayList>>> returnCalls = new ArrayList>>>(); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index a87703423..9b8d1b3d7 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -30,7 +30,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6"); + // TODO -- Ryan, do you know why the md5s changed just for the rank sum tests? + final String RyansMd5 = "ff370c42c8b09a29f1aeff5ac57c7ea6"; + final String EricsMd5 = "d8317f4589e8e0c48bcd087cdb75ce88"; + HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", EricsMd5); } private void HCTestComplexVariants(String bam, String args, String md5) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index dd1eea8a4..1b75a2c70 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -170,31 +170,33 @@ public class VariantContextAdaptors { final byte refBaseForIndel = ref.getBases()[index]; - Allele refAllele; - if ( dbsnp.getNCBIRefBase().equals("-") ) - refAllele = Allele.create(refBaseForIndel); - else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) - return null; - else - refAllele = Allele.create(refBaseForIndel + dbsnp.getNCBIRefBase(), true); - boolean addPaddingBase; if ( isSNP(dbsnp) || isMNP(dbsnp) ) addPaddingBase = false; else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) - addPaddingBase = true; + addPaddingBase = VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp))); else return null; // can't handle anything else + Allele refAllele; + if ( dbsnp.getNCBIRefBase().equals("-") ) + refAllele = Allele.create(refBaseForIndel, true); + else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) + return null; + else + refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true); + final List alleles = new ArrayList(); alleles.add(refAllele); // add all of the alt alleles for ( String alt : getAlternateAlleleList(dbsnp) ) { - if ( ! Allele.acceptableAlleleBases(alt) ) { + if ( Allele.wouldBeNullAllele(alt.getBytes())) + alt = ""; + else if ( ! Allele.acceptableAlleleBases(alt) ) return null; - } - alleles.add(Allele.create((addPaddingBase ? refBaseForIndel : "") + alt, false)); + + alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false)); } final VariantContextBuilder builder = new VariantContextBuilder(); @@ -203,6 +205,17 @@ public class VariantContextAdaptors { builder.alleles(alleles); return builder.make(); } + + private static List stripNullDashes(final List alleles) { + final List newAlleles = new ArrayList(alleles.size()); + for ( final String allele : alleles ) { + if ( allele.equals("-") ) + newAlleles.add(""); + else + newAlleles.add(allele); + } + return newAlleles; + } } // -------------------------------------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 9676704c2..9d96dedef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -25,6 +25,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -262,20 +263,33 @@ public class ValidationAmplicons extends RodWalker { sequenceInvalid = true; invReason.add("SITE_IS_FILTERED"); } + + String refString = validate.getReference().getDisplayString(); + String altString = validate.getAlternateAllele(0).getDisplayString(); + if ( validate.isIndel() ) { sequence.append(Character.toUpperCase((char)ref.getBase())); rawSequence.append(Character.toUpperCase((char)ref.getBase())); + final byte[] refAllele = validate.getReference().getBases(); + refString = new String(Arrays.copyOfRange(refAllele, 1, refAllele.length)); + if ( refString.isEmpty() ) + refString = "-"; + final byte[] altAllele = validate.getAlternateAllele(0).getBases(); + altString = new String(Arrays.copyOfRange(altAllele, 1, altAllele.length)); + if ( altString.isEmpty() ) + altString = "-"; } + sequence.append('['); - sequence.append(validate.getAlternateAllele(0).toString()); + sequence.append(altString); sequence.append('/'); - sequence.append(validate.getReference().toString()); + sequence.append(refString); sequence.append(']'); // do this to the raw sequence to -- the indeces will line up that way rawSequence.append('['); - rawSequence.append(validate.getAlternateAllele(0).getBaseString()); + rawSequence.append(altString); rawSequence.append('/'); - rawSequence.append(validate.getReference().getBaseString()); + rawSequence.append(refString); rawSequence.append(']'); allelePos = ref.getLocus(); if ( indelCounter > 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index adf30146f..b73a498bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -381,7 +381,7 @@ public class VariantsToTable extends RodWalker { getters.put("REF", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); - x.append(vc.getReference()); + x.append(vc.getReference().getDisplayString()); return x.toString(); } }); diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 143a053c9..54442622f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -176,7 +176,7 @@ public class Haplotype { newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; } } else if( refAllele.length() < altAllele.length() ) { // insertion - final int altAlleleLength = altAllele.length(); + final int altAlleleLength = altAllele.length() - 1; newHaplotype = new byte[bases.length + altAlleleLength]; for( int iii = 0; iii < bases.length; iii++ ) { newHaplotype[iii] = bases[iii]; @@ -185,15 +185,16 @@ public class Haplotype { newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; } for( int iii = 0; iii < altAlleleLength; iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; + newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1]; } } else { // deletion final int shift = refAllele.length() - altAllele.length(); + final int altAlleleLength = altAllele.length() - 1; newHaplotype = new byte[bases.length - shift]; - for( int iii = 0; iii < haplotypeInsertLocation + altAllele.length(); iii++ ) { + for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) { newHaplotype[iii] = bases[iii]; } - for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length; iii++ ) { + for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) { newHaplotype[iii] = bases[iii+shift]; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 72681ae35..979400350 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1129,6 +1129,11 @@ public class VariantContext implements Feature { // to enable tribble integratio else throw new ReviewedStingException(message); } + } else { + final long length = (stop - start) + 1; + if ( ! hasSymbolicAlleles() && length != getReference().length() ) { + throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); + } } } @@ -1151,11 +1156,6 @@ public class VariantContext implements Feature { // to enable tribble integratio // make sure there's one reference allele if ( ! alreadySeenRef ) throw new IllegalArgumentException("No reference allele found in VariantContext"); - - final long length = (stop - start) + 1; - if ( ! hasSymbolicAlleles() && length != getReference().length() ) { - throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); - } } private void validateGenotypes() { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 70d365ef8..a8f956413 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -747,7 +747,7 @@ public class VariantContextUtils { if ( !mappedVCs.containsKey(vc.getType()) ) mappedVCs.put(vc.getType(), new ArrayList()); mappedVCs.get(vc.getType()).add(vc); - } + } } return mappedVCs; @@ -809,10 +809,10 @@ public class VariantContextUtils { // // refAllele: ACGTGA // myRef: ACGT - // myAlt: - + // myAlt: A // // We need to remap all of the alleles in vc to include the extra GA so that - // myRef => refAllele and myAlt => GA + // myRef => refAllele and myAlt => AGA // Allele myRef = vc.getReference(); @@ -1335,6 +1335,35 @@ public class VariantContextUtils { } } + public static boolean requiresPaddingBase(final List alleles) { + + // see whether one of the alleles would be null if trimmed through + + for ( final String allele : alleles ) { + if ( allele.isEmpty() ) + return true; + } + + int clipping = 0; + Character currentBase = null; + + while ( true ) { + for ( final String allele : alleles ) { + if ( allele.length() - clipping == 0 ) + return true; + + char myBase = allele.charAt(clipping); + if ( currentBase == null ) + currentBase = myBase; + else if ( currentBase != myBase ) + return false; + } + + clipping++; + currentBase = null; + } + } + public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java index 7a849a819..80eda5ed9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java @@ -23,7 +23,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("27f9450afa132888a8994167f0035fd7")); + Arrays.asList("240d99b58f73985fb114abe9044c0271")); executeTest("Test probes", spec); } @@ -36,7 +36,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); + Arrays.asList("6e7789445e29d91979a21e78d3d53295")); executeTest("Test probes", spec); } @@ -49,7 +49,7 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); + Arrays.asList("18d7236208db603e143b40db06ef2aca")); executeTest("Test probes", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index bbee99ba6..3b60fa2c2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -98,16 +98,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); } @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format - @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); } + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); } @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index e9b845d59..8f648344d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("38697c195e7abf18d95dcc16c8e6d284")); executeTest("Test reading and writing samtools vcf", spec1); } @@ -66,7 +66,7 @@ public class VCFIntegrationTest extends WalkerTest { String testVCF = privateTestDir + "ex2.vcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27")); executeTest("Test writing samtools WEx BCF example", spec1); } From 2b1b00ade55e5bf21252a6be58bc69c81a7a50d3 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 27 Jul 2012 17:03:49 -0400 Subject: [PATCH 015/176] All integration tests and VC/Allele unit tests are passing --- ...elGenotypeLikelihoodsCalculationModel.java | 7 +--- .../sting/utils/variantcontext/Allele.java | 11 ++--- .../utils/variantcontext/AlleleUnitTest.java | 32 +------------- .../VariantContextUnitTest.java | 42 +++++++------------ 4 files changed, 22 insertions(+), 70 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java index a15c9d7da..1fef76116 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java @@ -42,7 +42,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4; private PairHMMIndelErrorModel pairModel; - private boolean allelesArePadded = false; /* private static ThreadLocal>> indelLikelihoodMap = new ThreadLocal>>() { @@ -88,12 +87,10 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi final List allAllelesToUse){ - final Pair,Boolean> pair = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true); - List alleles = pair.first; + List alleles = IndelGenotypeLikelihoodsCalculationModel.getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC,true); if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE) alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE); - allelesArePadded = pair.second; if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear(); haplotypeMap.clear(); @@ -121,6 +118,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi protected int getEndLocation(final RefMetaDataTracker tracker, final ReferenceContext ref, final List allelesToUse) { - return IndelGenotypeLikelihoodsCalculationModel.computeEndLocation(allelesToUse, ref.getLocus(), allelesArePadded); + return ref.getLocus().getStart() + allelesToUse.get(0).length() - 1; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index aa63a9dac..2c312678e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -180,14 +180,9 @@ public class Allele implements Comparable { public static Allele extend(Allele left, byte[] right) { if (left.isSymbolic()) throw new IllegalArgumentException("Cannot extend a symbolic allele"); - byte[] bases; - if ( left.length() == 0 ) - bases = right; - else { - bases = new byte[left.length() + right.length]; - System.arraycopy(left.getBases(), 0, bases, 0, left.length()); - System.arraycopy(right, 0, bases, left.length(), right.length); - } + byte[] bases = new byte[left.length() + right.length]; + System.arraycopy(left.getBases(), 0, bases, 0, left.length()); + System.arraycopy(right, 0, bases, left.length(), right.length); return create(bases, left.isReference()); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java index 3bf020df7..65398c373 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java @@ -47,13 +47,10 @@ import org.testng.annotations.Test; * Basic unit test for RecalData */ public class AlleleUnitTest { - Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall; + Allele ARef, A, T, ATIns, ATCIns, NoCall; @BeforeSuite public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); - A = Allele.create("A"); ARef = Allele.create("A", true); T = Allele.create("T"); @@ -99,14 +96,6 @@ public class AlleleUnitTest { Assert.assertEquals(ATCIns.length(), 3); Assert.assertEquals(ATIns.getBases(), "AT".getBytes()); Assert.assertEquals(ATCIns.getBases(), "ATC".getBytes()); - - Assert.assertTrue(del.isNonReference()); - Assert.assertFalse(delRef.isNonReference()); - Assert.assertFalse(del.isReference()); - Assert.assertTrue(delRef.isReference()); - Assert.assertFalse(del.basesMatch("-")); - Assert.assertTrue(del.basesMatch("")); - Assert.assertEquals(del.length(), 0); } @@ -122,18 +111,6 @@ public class AlleleUnitTest { Assert.assertFalse(a1.equals(a4)); } - @Test - public void testDelConstructors() { - Allele a1 = Allele.create("-"); - Allele a2 = Allele.create("-".getBytes()); - Allele a3 = Allele.create(""); - Allele a4 = Allele.create("", true); - - Assert.assertTrue(a1.equals(a2)); - Assert.assertTrue(a1.equals(a3)); - Assert.assertFalse(a1.equals(a4)); - } - @Test public void testInsConstructors() { Allele a1 = Allele.create("AC"); @@ -150,7 +127,6 @@ public class AlleleUnitTest { public void testEquals() { Assert.assertTrue(ARef.basesMatch(A)); Assert.assertFalse(ARef.equals(A)); - Assert.assertFalse(ARef.equals(del)); Assert.assertFalse(ARef.equals(ATIns)); Assert.assertFalse(ARef.equals(ATCIns)); @@ -158,11 +134,6 @@ public class AlleleUnitTest { Assert.assertFalse(T.basesMatch(A)); Assert.assertFalse(T.equals(A)); - Assert.assertTrue(del.basesMatch(del)); - Assert.assertTrue(del.basesMatch(delRef)); - Assert.assertTrue(del.equals(del)); - Assert.assertFalse(del.equals(delRef)); - Assert.assertTrue(ATIns.equals(ATIns)); Assert.assertFalse(ATIns.equals(ATCIns)); Assert.assertTrue(ATIns.basesMatch("AT")); @@ -203,7 +174,6 @@ public class AlleleUnitTest { public void testExtend() { Assert.assertEquals("AT", Allele.extend(Allele.create("A"), "T".getBytes()).toString()); Assert.assertEquals("ATA", Allele.extend(Allele.create("A"), "TA".getBytes()).toString()); - Assert.assertEquals("A", Allele.extend(Allele.create("-"), "A".getBytes()).toString()); Assert.assertEquals("A", Allele.extend(Allele.NO_CALL, "A".getBytes()).toString()); Assert.assertEquals("ATCGA", Allele.extend(Allele.create("AT"), "CGA".getBytes()).toString()); Assert.assertEquals("ATCGA", Allele.extend(Allele.create("ATC"), "GA".getBytes()).toString()); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 46153221d..272166c68 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -381,13 +381,13 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testAccessingCompleteGenotypes() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T, ATC); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); - Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, del)); - Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(del, del)); + Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC)); + Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC)); Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) @@ -403,7 +403,7 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(10, vc.getCalledChrCount()); Assert.assertEquals(3, vc.getCalledChrCount(Aref)); Assert.assertEquals(4, vc.getCalledChrCount(T)); - Assert.assertEquals(3, vc.getCalledChrCount(del)); + Assert.assertEquals(3, vc.getCalledChrCount(ATC)); Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL)); } @@ -411,7 +411,7 @@ public class VariantContextUnitTest extends BaseTest { public void testAccessingRefGenotypes() { List alleles1 = Arrays.asList(Aref, T); List alleles2 = Arrays.asList(Aref); - List alleles3 = Arrays.asList(Aref, T, del); + List alleles3 = Arrays.asList(Aref, T); for ( List alleles : Arrays.asList(alleles1, alleles2, alleles3)) { Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref)); @@ -433,7 +433,7 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testFilters() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); @@ -492,15 +492,15 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7); - // -*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 + // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[1],4); Assert.assertEquals(result.getSecond().length,3); - // ATC*,-,ATCATC - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ATCref,nullA,atcatc)).make(); + // ATC*,A,ATCATC + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[1],2); @@ -517,7 +517,7 @@ public class VariantContextUnitTest extends BaseTest { // CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9 refBytes = "TCCCCCCCAGAGAGAG".getBytes(); - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],7); Assert.assertEquals(result.getFirst().toArray()[1],5); @@ -527,7 +527,7 @@ public class VariantContextUnitTest extends BaseTest { // GAGA*,-,GAGAGAGA refBytes = "TGAGAGAGAGATTT".getBytes(); - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(gagaR, nullA,gagagaga)).make(); + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make(); result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); Assert.assertEquals(result.getFirst().toArray()[0],5); Assert.assertEquals(result.getFirst().toArray()[1],3); @@ -559,27 +559,24 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testVCFfromGenotypes() { - List alleles = Arrays.asList(Aref, T, del); + List alleles = Arrays.asList(Aref, T); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); Genotype g4 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); - Genotype g5 = GenotypeBuilder.create("--", Arrays.asList(del, del)); - VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make(); + VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4).make(); VariantContext vc12 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g2.getSampleName())), true); VariantContext vc1 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName())), true); VariantContext vc23 = vc.subContextFromSamples(new HashSet(Arrays.asList(g2.getSampleName(), g3.getSampleName())), true); VariantContext vc4 = vc.subContextFromSamples(new HashSet(Arrays.asList(g4.getSampleName())), true); VariantContext vc14 = vc.subContextFromSamples(new HashSet(Arrays.asList(g1.getSampleName(), g4.getSampleName())), true); - VariantContext vc5 = vc.subContextFromSamples(new HashSet(Arrays.asList(g5.getSampleName())), true); Assert.assertTrue(vc12.isPolymorphicInSamples()); Assert.assertTrue(vc23.isPolymorphicInSamples()); Assert.assertTrue(vc1.isMonomorphicInSamples()); Assert.assertTrue(vc4.isMonomorphicInSamples()); Assert.assertTrue(vc14.isMonomorphicInSamples()); - Assert.assertTrue(vc5.isPolymorphicInSamples()); Assert.assertTrue(vc12.isSNP()); Assert.assertTrue(vc12.isVariant()); @@ -601,17 +598,11 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertFalse(vc14.isVariant()); Assert.assertFalse(vc14.isBiallelic()); - Assert.assertTrue(vc5.isIndel()); - Assert.assertTrue(vc5.isSimpleDeletion()); - Assert.assertTrue(vc5.isVariant()); - Assert.assertTrue(vc5.isBiallelic()); - Assert.assertEquals(3, vc12.getCalledChrCount(Aref)); Assert.assertEquals(1, vc23.getCalledChrCount(Aref)); Assert.assertEquals(2, vc1.getCalledChrCount(Aref)); Assert.assertEquals(0, vc4.getCalledChrCount(Aref)); Assert.assertEquals(2, vc14.getCalledChrCount(Aref)); - Assert.assertEquals(0, vc5.getCalledChrCount(Aref)); } public void testGetGenotypeMethods() { @@ -659,13 +650,12 @@ public class VariantContextUnitTest extends BaseTest { @DataProvider(name = "getAlleles") public Object[][] mergeAllelesData() { new GetAllelesTest("A*", Aref); - new GetAllelesTest("-*", delRef); new GetAllelesTest("A*/C", Aref, C); new GetAllelesTest("A*/C/T", Aref, C, T); new GetAllelesTest("A*/T/C", Aref, T, C); - new GetAllelesTest("A*/C/T/-", Aref, C, T, del); - new GetAllelesTest("A*/T/C/-", Aref, T, C, del); - new GetAllelesTest("A*/-/T/C", Aref, del, T, C); + new GetAllelesTest("A*/C/T/ATC", Aref, C, T, ATC); + new GetAllelesTest("A*/T/C/ATC", Aref, T, C, ATC); + new GetAllelesTest("A*/ATC/T/C", Aref, ATC, T, C); return GetAllelesTest.getTests(GetAllelesTest.class); } From 99b15b2b3a761958690cb76b1796486350253caa Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 29 Jul 2012 01:07:59 -0400 Subject: [PATCH 016/176] Final checkpoint: all tests pass. Note that there were bugs in the PoolGenotypeLikelihoodsUnitTest that needed fixing and eventually led to my needing to disable one of the tests (with a note for Guillermo to look into it). Also note that while I have moved over the GATK to use the new non-null representation of Alleles, I didn't remove all of the now-superfluous code throughout to do padding checking on merges; we'll need to do this on a subsequent push. --- .../PoolGenotypeLikelihoodsUnitTest.java | 24 +-- .../ArtificialReadPileupTestProvider.java | 143 ++++++++---------- .../IndelGenotypeLikelihoodsUnitTest.java | 6 +- .../VariantContextUtilsUnitTest.java | 32 ++-- 4 files changed, 93 insertions(+), 112 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java index 9aab24998..74abb6b11 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; @@ -290,15 +289,17 @@ public class PoolGenotypeLikelihoodsUnitTest { } - @Test + // TODO -- Guillermo, this test cannot work because the ArtificialReadPileupTestProvider returns a position of chr1:5, which is less than + // TODO -- HAPLOTYPE_SIZE in IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles() so the HaplotypeMap is not populated. + @Test (enabled = false) public void testIndelErrorModel() { final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); final byte refByte = refPileupTestProvider.getRefByte(); - final String altBases = refByte + "TCA"; + final String altBases = (char)refByte + "TCA"; final String refSampleName = refPileupTestProvider.getSampleNames().get(0); final List trueAlleles = new ArrayList(); trueAlleles.add(Allele.create(refByte, true)); - trueAlleles.add(Allele.create(refByte + "TC", false)); + trueAlleles.add(Allele.create((char)refByte + "TC", false)); final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases()); final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), @@ -392,9 +393,6 @@ public class PoolGenotypeLikelihoodsUnitTest { final byte refByte = readPileupTestProvider.getRefByte(); final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; - final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte); - final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte); - final List allAlleles = new ArrayList(); // this contains only ref Allele up to now final Set laneIDs = new TreeSet(); laneIDs.add(GenotypeLikelihoodsCalculationModel.DUMMY_LANE); @@ -411,11 +409,17 @@ public class PoolGenotypeLikelihoodsUnitTest { for (String laneID : laneIDs) noisyErrorModels.put(laneID, Q30ErrorModel); + final int refIdx = 0; + int altIdx = 2; + + // ref allele must be first + allAlleles.add(Allele.create(refByte, true)); for (byte b: BaseUtils.BASES) { - if (refByte == b) - allAlleles.add(Allele.create(b,true)); - else + if (refByte != b) { + if (b == altByte) + altIdx = allAlleles.size(); allAlleles.add(Allele.create(b, false)); + } } PrintStream out = null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index a911718c1..17149220a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -38,9 +38,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import java.util.*; @@ -103,39 +100,27 @@ public class ArtificialReadPileupTestProvider { boolean addBaseErrors, int phredScaledBaseErrorRate) { // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); - ArrayList vcAlleles = new ArrayList(); - String refBase = refBases.substring(offset,offset+1); // referenceContext.getBase()? - Allele refAllele, altAllele; + String refAllele, altAllele; if (eventLength == 0) { // SNP case - refAllele = Allele.create(refBase,true); - altAllele = Allele.create(altBases.substring(0,1), false); + refAllele = new String(new byte[]{referenceContext.getBase()}); + altAllele = new String(altBases.substring(0,1)); } else if (eventLength>0){ // insertion - refAllele = Allele.create(refBase,true); - altAllele = Allele.create(refBase + altBases.substring(0,eventLength), false); + refAllele = ""; + altAllele = altBases.substring(0,eventLength); } else { // deletion - refAllele = Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); - altAllele = Allele.create(refBase, false); + refAllele = refBases.substring(offset,offset+Math.abs(eventLength)); + altAllele = ""; } - int stop = loc.getStart(); - vcAlleles.add(refAllele); - vcAlleles.add(altAllele); - - final VariantContextBuilder builder = new VariantContextBuilder().source(""); - builder.loc(loc.getContig(), loc.getStart(), stop); - builder.alleles(vcAlleles); - builder.noGenotypes(); - - final VariantContext vc = builder.make(); Map contexts = new HashMap(); for (String sample: sampleNames) { - AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate)); + AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc, refAllele, altAllele, altBases, numReadsPerAllele, sample, addBaseErrors, phredScaledBaseErrorRate)); contexts.put(sample,context); } @@ -149,73 +134,71 @@ public class ArtificialReadPileupTestProvider { rg.setSample(name); return rg; } - private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases, + + private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, String refAllele, String altAllele, String altBases, int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) { List pileupElements = new ArrayList(); - int readStart = contigStart; int offset = (contigStop-contigStart+1)/2; - int refAlleleLength = 0; - int readCounter = 0; - int alleleCounter = 0; - for (Allele allele: vc.getAlleles()) { - if (allele.isReference()) - refAlleleLength = allele.getBases().length; - - int alleleLength = allele.getBases().length; - - for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) { - byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); - if (addErrors) - addBaseErrors(readBases, phredScaledErrorRate); - - byte[] readQuals = new byte[readBases.length]; - Arrays.fill(readQuals, (byte)phredScaledErrorRate); - - GATKSAMRecord read = new GATKSAMRecord(header); - read.setBaseQualities(readQuals); - read.setReadBases(readBases); - read.setReadName(artificialReadName+readCounter++); - - boolean isBeforeDeletion = false, isBeforeInsertion = false; - if (allele.isReference()) - read.setCigarString(readBases.length + "M"); - else { - isBeforeDeletion = alleleLengthrefAlleleLength; - if (isBeforeDeletion || isBeforeInsertion) - read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") + - (readBases.length-offset)+"M"); - else // SNP case - read.setCigarString(readBases.length+"M"); - } - - int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0)); - read.setReadPairedFlag(false); - read.setAlignmentStart(readStart); - read.setMappingQuality(artificialMappingQuality); - read.setReferenceName(loc.getContig()); - read.setReadNegativeStrandFlag(false); - read.setAttribute("RG", sampleRG(sample).getReadGroupId()); - - - pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength)); - } - alleleCounter++; - } + int refAlleleLength = refAllele.length(); + pileupElements.addAll(createPileupElements(refAllele, loc, numReadsPerAllele[0], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, true)); + pileupElements.addAll(createPileupElements(altAllele, loc, numReadsPerAllele[1], sample, contigStart, offset, altBases, addErrors, phredScaledErrorRate, refAlleleLength, false)); return new ReadBackedPileupImpl(loc,pileupElements); } - private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) { + private List createPileupElements(String allele, GenomeLoc loc, int numReadsPerAllele, String sample, int readStart, int offset, String altBases, boolean addErrors, int phredScaledErrorRate, int refAlleleLength, boolean isReference) { + + int alleleLength = allele.length(); + List pileupElements = new ArrayList(); + + int readCounter = 0; + for ( int d = 0; d < numReadsPerAllele; d++ ) { + byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); + if (addErrors) + addBaseErrors(readBases, phredScaledErrorRate); + + byte[] readQuals = new byte[readBases.length]; + Arrays.fill(readQuals, (byte)phredScaledErrorRate); + + GATKSAMRecord read = new GATKSAMRecord(header); + read.setBaseQualities(readQuals); + read.setReadBases(readBases); + read.setReadName(artificialReadName+readCounter++); + + boolean isBeforeDeletion = false, isBeforeInsertion = false; + if (isReference) + read.setCigarString(readBases.length + "M"); + else { + isBeforeDeletion = alleleLengthrefAlleleLength; + if (isBeforeDeletion || isBeforeInsertion) + read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") + + (readBases.length-offset)+"M"); + else // SNP case + read.setCigarString(readBases.length+"M"); + } + + int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0)); + read.setReadPairedFlag(false); + read.setAlignmentStart(readStart); + read.setMappingQuality(artificialMappingQuality); + read.setReferenceName(loc.getContig()); + read.setReadNegativeStrandFlag(false); + read.setAttribute("RG", sampleRG(sample).getReadGroupId()); + + + pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength)); + } + + return pileupElements; + } + + private byte[] trueHaplotype(String allele, int offset, int refAlleleLength) { // create haplotype based on a particular allele - String prefix = refBases.substring(offset); - String alleleBases = new String(allele.getBases()); + String prefix = refBases.substring(0, offset); String postfix = refBases.substring(offset+refAlleleLength,refBases.length()); - return (prefix+alleleBases+postfix).getBytes(); - - - + return (prefix+allele+postfix).getBytes(); } private void addBaseErrors(final byte[] readBases, final int phredScaledErrorRate) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java index c7ef51d0c..dfd4bc525 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java @@ -70,7 +70,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { List alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); + Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength)); @@ -79,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { eventLength = 3; alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength)); + Assert.assertEquals(alleles.get(0).getBaseString().substring(1), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength)); // same with min Reads = 11 alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases); @@ -97,7 +97,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); + Assert.assertEquals(alleles.get(1).getBaseString().substring(1), altBases.substring(0,eventLength)); altBases = "CCTCNTGAGA"; eventLength = 5; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 8c86a54de..8ba11db02 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -39,7 +39,7 @@ import java.io.FileNotFoundException; import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, C, delRef, Cref, ATC, ATCATC; + Allele Aref, T, C, Cref, ATC, ATCATC; private GenomeLocParser genomeLocParser; @BeforeSuite @@ -56,7 +56,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { // alleles Aref = Allele.create("A", true); Cref = Allele.create("C", true); - delRef = Allele.create("-", true); T = Allele.create("T"); C = Allele.create("C"); ATC = Allele.create("ATC"); @@ -156,28 +155,23 @@ public class VariantContextUtilsUnitTest extends BaseTest { Arrays.asList(Aref, C), Arrays.asList(Aref, T, C)); // in order of appearence - // The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant. - // The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference. - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(Cref)); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, ATC), + Arrays.asList(Aref, ATC)); - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATC)); - - new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, ATC, ATCATC), + Arrays.asList(Aref, ATC, ATCATC)); // alleles in the order we see them - new MergeAllelesTest(Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATCATC, ATC)); + new MergeAllelesTest(Arrays.asList(Aref, ATCATC), + Arrays.asList(Aref, ATC, ATCATC), + Arrays.asList(Aref, ATCATC, ATC)); // same - new MergeAllelesTest(Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + new MergeAllelesTest(Arrays.asList(Aref, ATC), + Arrays.asList(Aref, ATCATC), + Arrays.asList(Aref, ATC, ATCATC)); return MergeAllelesTest.getTests(MergeAllelesTest.class); } From c4ae9c6cfbd48233e6756654fd5dc837d51efa3c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 29 Jul 2012 19:22:02 -0400 Subject: [PATCH 017/176] With the new Allele representation we can finally handle complex events (because they aren't so complex anymore). One place this manifests itself is with the strict VCF validation (ValidateVariants used to skip these events but doesn't anymore) so I've added a new test with complex events to the VV integration test. --- .../ValidateVariantsIntegrationTest.java | 10 ++++++++ .../sting/utils/HaplotypeUnitTest.java | 24 +++++++++---------- .../VariantJEXLContextUnitTest.java | 5 +--- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 3277f5060..6a3d755d7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -125,4 +125,14 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { executeTest("test bad ref allele in deletion", spec); } + @Test + public void testComplexEvents() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("complexEvents.vcf", "ALL"), + 0, + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") + ); + + executeTest("test validating complex events", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java index ec08d97c5..161eefa8f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java @@ -53,11 +53,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases); h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA"; - basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases); } @Test @@ -68,11 +68,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases); h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; - basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases); } @Test @@ -102,11 +102,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 1, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 7, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC"; - basicInsertTest("-", "ACTT", 17, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases); } @Test @@ -121,11 +121,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("ACTT", "-", 1, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("ACTT", "-", 7, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC"; - basicInsertTest("ACTT", "-", 17, h1Cigar, bases, h1bases); + basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases); } @Test diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java index 6f5756bdc..8f03f1d38 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java @@ -56,7 +56,7 @@ public class VariantJEXLContextUnitTest extends BaseTest { Allele A, Aref, T, Tref; - Allele del, delRef, ATC, ATCref; + Allele ATC, ATCref; // A [ref] / T at 10 GenomeLoc snpLoc; @@ -84,9 +84,6 @@ public class VariantJEXLContextUnitTest extends BaseTest { @BeforeMethod public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); - A = Allele.create("A"); Aref = Allele.create("A", true); T = Allele.create("T"); From b07bf1950b639e34295411b1826ec6e758e3f17a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 29 Jul 2012 22:19:49 -0400 Subject: [PATCH 018/176] Adding an integration test for another feature that I snuck in during a previous commit: we now allow lower-case bases in the REF/ALT alleles of a VCF and upper-case them (this had been turned off because the previous version used Strings to do the uppercasing whereas we stick with byte operations now). --- .../sting/utils/codecs/vcf/VCFIntegrationTest.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 8f648344d..3948ba971 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest { executeTest("Test reading and writing breakpoint VCF", spec1); } + @Test(enabled = true) + public void testReadingLowerCaseBases() { + String testVCF = privateTestDir + "lowercaseBases.vcf"; + + String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; + + String test1 = baseCommand + "-T SelectVariants -V " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e0e308a25e56bde1c664139bb44ed19d")); + executeTest("Test reading VCF with lower-case bases", spec1); + } + @Test(enabled = true) public void testReadingAndWriting1000GSVs() { String testVCF = privateTestDir + "1000G_SVs.chr1.vcf"; From 7630c929a7fb282843c70e2d27bc8ca57f2dbaec Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 29 Jul 2012 22:24:56 -0400 Subject: [PATCH 019/176] Re-enabling the unit tests for reverse allele clipping --- .../VariantContextUtilsUnitTest.java | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 8ba11db02..95e8458c8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -655,4 +655,52 @@ public class VariantContextUtilsUnitTest extends BaseTest { // test alleles are equal Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); } + + // -------------------------------------------------------------------------------- + // + // basic allele clipping test + // + // -------------------------------------------------------------------------------- + + private class ReverseClippingPositionTestProvider extends TestDataProvider { + final String ref; + final List alleles = new ArrayList(); + final int expectedClip; + + private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) { + super(ReverseClippingPositionTestProvider.class); + this.ref = ref; + for ( final String allele : alleles ) + this.alleles.add(Allele.create(allele)); + this.expectedClip = expectedClip; + } + + @Override + public String toString() { + return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); + } + } + + @DataProvider(name = "ReverseClippingPositionTestProvider") + public Object[][] makeReverseClippingPositionTestProvider() { + // pair clipping + new ReverseClippingPositionTestProvider(0, "ATT", "CCG"); + new ReverseClippingPositionTestProvider(1, "ATT", "CCT"); + new ReverseClippingPositionTestProvider(2, "ATT", "CTT"); + new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele + + // triplets + new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG"); + new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go + new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go + + return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); + } + + + @Test(dataProvider = "ReverseClippingPositionTestProvider") + public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { + int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); + Assert.assertEquals(result, cfg.expectedClip); + } } From 5b9a1af7febf01463a409e15c8d9844b26e3ce4f Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Mon, 30 Jul 2012 09:56:10 -0400 Subject: [PATCH 020/176] Intermediate fix for pool GL unit test: fix up artificial read pileup provider to give consistent data. b) Increase downsampling in pool integration tests with reference sample, and shorten MT tests so they don't last too long --- .../gatk/walkers/genotyper/ErrorModel.java | 11 ++-- .../genotyper/PoolSNPGenotypeLikelihoods.java | 8 ++- .../genotyper/PoolCallerIntegrationTest.java | 20 +++---- .../PoolGenotypeLikelihoodsUnitTest.java | 21 +++++-- .../ArtificialReadPileupTestProvider.java | 56 +++++++++++-------- 5 files changed, 70 insertions(+), 46 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java index 864414de9..8e4ca9595 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java @@ -68,10 +68,10 @@ public class ErrorModel { break; } } + haplotypeMap = new LinkedHashMap(); if (refSampleVC.isIndel()) { pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION); - haplotypeMap = new LinkedHashMap(); indelLikelihoodMap = new HashMap>(); IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements } @@ -96,7 +96,8 @@ public class ErrorModel { final int readCounts[] = new int[refSamplePileup.getNumberOfElements()]; //perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()]; final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles()); - perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts); + if (!haplotypeMap.isEmpty()) + perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts); } int idx = 0; for (PileupElement refPileupElement : refSamplePileup) { @@ -108,7 +109,7 @@ public class ErrorModel { if (DEBUG) System.out.println(m); isMatch |= m; } - if (refSampleVC.isIndel()) { + if (refSampleVC.isIndel() && !haplotypeMap.isEmpty()) { // ignore match/mismatch if reads, as determined by their likelihood, are not informative double[] perAlleleLikelihoods = perReadLikelihoods[idx++]; if (!isInformativeElement(perAlleleLikelihoods)) @@ -173,10 +174,10 @@ public class ErrorModel { // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch if (allele.isReference()) { // for a ref allele, any base mismatch or new indel is a mismatch. - if(allele.getBases().length>0 ) + if(allele.getBases().length>0) // todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location // could clean up/simplify this when unpadding is removed - return (pileupElement.getBase() == refBase); + return (pileupElement.getBase() == refBase && !pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart()); else // either null allele to compare, or ref/alt lengths are different (indel by definition). // if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java index 1e445270f..f763392ae 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java @@ -5,6 +5,7 @@ import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -48,7 +49,12 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem myAlleles = new ArrayList(alleles); - refByte = alleles.get(0).getBases()[0]; // by construction, first allele in list is always ref! + Allele refAllele = alleles.get(0); + //sanity check: by construction, first allele should ALWAYS be the reference alleles + if (!refAllele.isReference()) + throw new ReviewedStingException("BUG: First allele in list passed to PoolSNPGenotypeLikelihoods should be reference!"); + + refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref! if (myAlleles.size() < BaseUtils.BASES.length) { // likelihood only defined for subset of possible alleles. Fill then with other alleles to have all possible ones, diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java index c6b1a8b7f..a2b478b0e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java @@ -18,27 +18,27 @@ public class PoolCallerIntegrationTest extends WalkerTest { final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam"; final String REFSAMPLE_MT_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12878.snp.vcf"; final String REFSAMPLE_NAME = "NA12878"; - final String MTINTERVALS = "MT"; + final String MTINTERVALS = "MT:1-3000"; final String LSVINTERVALS = "20:40,000,000-41,000,000"; final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf"; final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf"; final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf"; private void PC_MT_Test(String bam, String args, String name, String md5) { - final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL", + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL", REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testPoolCaller:"+name+" args=" + args, spec); } private void PC_LSV_Test(String args, String name, String model, String md5) { - final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL", + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL", REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testPoolCaller:"+name+" args=" + args, spec); } private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) { - final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL", + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL", REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testPoolCaller:"+name+" args=" + args, spec); @@ -46,33 +46,33 @@ public class PoolCallerIntegrationTest extends WalkerTest { @Test public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4"); + PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","d8cba4ec4267d7d766081fcead845d08"); } @Test public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478"); + PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","8e9b7e89c439b430e95b146a7540c72e"); } @Test public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706"); + PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","96087fe9240e3656cc2a4e0ff0174d5b"); } @Test public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29"); + PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","6fdae7093831ecfc82a06dd707d62fe9"); } @Test public void testMT_SNP_DISCOVERY_sp4() { - PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","fa5ee7c957c473a80f3a7f3c35dc80b5"); + PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7"); } @Test public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "6907c8617d49bb57b33f8704ce7f0323"); + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java index c97c4ed28..9a785acda 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java @@ -392,8 +392,6 @@ public class PoolGenotypeLikelihoodsUnitTest { final byte refByte = readPileupTestProvider.getRefByte(); final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; - final int refIdx = BaseUtils.simpleBaseToBaseIndex(refByte); - final int altIdx = BaseUtils.simpleBaseToBaseIndex(altByte); final List allAlleles = new ArrayList(); // this contains only ref Allele up to now final Set laneIDs = new TreeSet(); @@ -411,17 +409,28 @@ public class PoolGenotypeLikelihoodsUnitTest { for (String laneID : laneIDs) noisyErrorModels.put(laneID, Q30ErrorModel); + // all first ref allele + allAlleles.add(Allele.create(refByte,true)); for (byte b: BaseUtils.BASES) { - if (refByte == b) - allAlleles.add(Allele.create(b,true)); - else + if (refByte != b) allAlleles.add(Allele.create(b, false)); } + final int refIdx = 0; + int altIdx = -1; + + for (int k=0; k < allAlleles.size(); k++) + if (altByte == allAlleles.get(k).getBases()[0]) { + altIdx = k; + break; + } + + + PrintStream out = null; if (SIMULATE_NOISY_PILEUP) { try { - out = new PrintStream(new File("/humgen/gsa-scr1/delangel/GATK/Sting_unstable_mac/GLUnitTest.table")); + out = new PrintStream(new File("GLUnitTest.table")); // out = new PrintStream(new File("/Users/delangel/GATK/Sting_unstable/GLUnitTest.table")); } catch (Exception e) {} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index 77769a5fe..8011ea66b 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -62,9 +62,9 @@ public class ArtificialReadPileupTestProvider { List sampleNames = new ArrayList(); private String sampleName(int i) { return sampleNames.get(i); } private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); } - public final int offset = 5; + public final int locStart = 5; // 1-based public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); - public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset); + public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,locStart,locStart); //1-based public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10); public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes()); @@ -103,22 +103,22 @@ public class ArtificialReadPileupTestProvider { boolean addBaseErrors, int phredScaledBaseErrorRate) { // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); - + String refStr = new String(new byte[]{referenceContext.getBase()}); ArrayList vcAlleles = new ArrayList(); Allele refAllele, altAllele; if (eventLength == 0) {// SNP case - refAllele =Allele.create(referenceContext.getBase(),true); + refAllele =Allele.create(refStr,true); altAllele = Allele.create(altBases.substring(0,1), false); } else if (eventLength>0){ // insertion - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(altBases.substring(0,eventLength), false); + refAllele = Allele.create(refStr, true); + altAllele = Allele.create(refStr+altBases.substring(0,eventLength), false); } else { // deletion - refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + refAllele =Allele.create(refBases.substring(locStart-1,locStart+Math.abs(eventLength)-1),true); + altAllele = Allele.create(refBases.substring(locStart-1,locStart), false); } int stop = loc.getStart(); vcAlleles.add(refAllele); @@ -153,18 +153,15 @@ public class ArtificialReadPileupTestProvider { int[] numReadsPerAllele, String sample, boolean addErrors, int phredScaledErrorRate) { List pileupElements = new ArrayList(); int readStart = contigStart; - int offset = (contigStop-contigStart+1)/2; - int refAlleleLength = 0; + + int refAlleleLength = vc.getReference().getBases().length; int readCounter = 0; int alleleCounter = 0; for (Allele allele: vc.getAlleles()) { - if (allele.isReference()) - refAlleleLength = allele.getBases().length; - int alleleLength = allele.getBases().length; for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) { - byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); + byte[] readBases = trueHaplotype(allele, locStart, vc.getReference()); if (addErrors) addBaseErrors(readBases, phredScaledErrorRate); @@ -176,20 +173,20 @@ public class ArtificialReadPileupTestProvider { read.setReadBases(readBases); read.setReadName(artificialReadName+readCounter++); - boolean isBeforeDeletion = false, isBeforeInsertion = false; + boolean isBeforeDeletion = alleleLengthrefAlleleLength; + + int eventLength = alleleLength - refAlleleLength; if (allele.isReference()) read.setCigarString(readBases.length + "M"); else { - isBeforeDeletion = alleleLengthrefAlleleLength; if (isBeforeDeletion || isBeforeInsertion) - read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") + - (readBases.length-offset)+"M"); + read.setCigarString(locStart+"M"+ eventLength + (isBeforeDeletion?"D":"I") + + (readBases.length-locStart)+"M"); else // SNP case read.setCigarString(readBases.length+"M"); } - int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0)); read.setReadPairedFlag(false); read.setAlignmentStart(readStart); read.setMappingQuality(artificialMappingQuality); @@ -198,7 +195,7 @@ public class ArtificialReadPileupTestProvider { read.setAttribute("RG", sampleRG(sample).getReadGroupId()); - pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength)); + pileupElements.add(new PileupElement(read,locStart-1,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength-1),eventLength)); } alleleCounter++; } @@ -206,11 +203,22 @@ public class ArtificialReadPileupTestProvider { return new ReadBackedPileupImpl(loc,pileupElements); } - private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) { + /** + * create haplotype based on a particular allele + * @param allele Allele of interest. ASSUMED TO INCLUDE REF BASE AT startPosition! + * @param startPosition 1-based start position of allele + * @param refAllele REF allele + * @return + */ + private byte[] trueHaplotype(Allele allele, int startPosition, Allele refAllele) { + // create haplotype based on a particular allele - String prefix = refBases.substring(offset); + // startPosition is 1-based. + // so, if startPosition == 5, we need to include positions 1 to 4 , or indeces 0 to 3 of string + String prefix = refBases.substring(0,startPosition-1); String alleleBases = new String(allele.getBases()); - String postfix = refBases.substring(offset+refAlleleLength,refBases.length()); + // where to start postfix? We have (startPosition-1) prefix bases + refAllele.length bases before postfix + String postfix = refBases.substring(startPosition -1 + refAllele.getBases().length,refBases.length()); return (prefix+alleleBases+postfix).getBytes(); From 0b30588d67ad72177b5d1ecb9d0553b0c9832b17 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 30 Jul 2012 11:59:56 -0400 Subject: [PATCH 021/176] Catch yet another class of User Errors --- .../src/org/broadinstitute/sting/gatk/CommandLineGATK.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index b1ad19e69..306ebdd0e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable { // can't close tribble index when writing if ( message.indexOf("Unable to close index for") != -1 ) exitSystemWithUserError(new UserException(t.getCause().getMessage())); + + // disk is full + if ( message.indexOf("No space left on device") != -1 ) + exitSystemWithUserError(new UserException(t.getMessage())); + if ( t.getCause().getMessage().indexOf("No space left on device") != -1 ) + exitSystemWithUserError(new UserException(t.getCause().getMessage())); } /** From 7a73042cd3be4deff38f2164798d70601fa294ac Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 30 Jul 2012 12:09:23 -0400 Subject: [PATCH 022/176] Bug fix for the case of merging two VCs when a deletion deletes the padding base for a consecutive indel. Added unit test to cover this case. --- .../gatk/walkers/haplotypecaller/GenotypingEngine.java | 9 +++++---- .../haplotypecaller/GenotypingEngineUnitTest.java | 10 ++++++++++ .../HaplotypeCallerIntegrationTest.java | 6 +----- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 678a65024..4f18ece7b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -423,16 +423,17 @@ public class GenotypingEngine { protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) { final int thisStart = thisVC.getStart(); final int nextStart = nextVC.getStart(); - byte[] refBases = ( new byte[]{} ); - byte[] altBases = ( new byte[]{} ); + byte[] refBases = new byte[]{}; + byte[] altBases = new byte[]{}; refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases()); altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases()); - for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) { + int locus; + for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) { final byte refByte = ref[locus - refLoc.getStart()]; refBases = ArrayUtils.add(refBases, refByte); altBases = ArrayUtils.add(altBases, refByte); } - refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases()); + refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases()); int iii = 0; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index 4bcf5a0a0..539190fe9 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -353,6 +353,16 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + // deletion + insertion (abutting) + thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make(); + nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make(); + truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + // complex + complex thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make(); nextVC = new VariantContextBuilder().loc("2", 1706, 1707).alleles("GG","AC").make(); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 9b8d1b3d7..9b149e8d1 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -30,10 +30,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { - // TODO -- Ryan, do you know why the md5s changed just for the rank sum tests? - final String RyansMd5 = "ff370c42c8b09a29f1aeff5ac57c7ea6"; - final String EricsMd5 = "d8317f4589e8e0c48bcd087cdb75ce88"; - HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", EricsMd5); + HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "d8317f4589e8e0c48bcd087cdb75ce88"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -46,6 +43,5 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testHaplotypeCallerMultiSampleComplex() { HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b"); } - } From 48b9495460d6d4ebd16885d8cb74ffbb1cb1df5f Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 30 Jul 2012 12:12:56 -0400 Subject: [PATCH 023/176] Fixes to the likelihood based LD calculation for deciding when to combine consecutive events. --- .../haplotypecaller/GenotypingEngine.java | 39 ++++++++++--------- .../haplotypecaller/HaplotypeCaller.java | 6 +-- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 6ea735ec0..9df92ba7a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -334,10 +334,10 @@ public class GenotypingEngine { boolean isBiallelic = true; VariantContext thisVC = null; VariantContext nextVC = null; - int x11 = 0; - int x12 = 0; - int x21 = 0; - int x22 = 0; + double x11 = Double.NEGATIVE_INFINITY; + double x12 = Double.NEGATIVE_INFINITY; + double x21 = Double.NEGATIVE_INFINITY; + double x22 = Double.NEGATIVE_INFINITY; for( final Haplotype h : haplotypes ) { // only make complex substitutions out of consecutive biallelic sites @@ -360,13 +360,17 @@ public class GenotypingEngine { } } // count up the co-occurrences of the events for the R^2 calculation - // BUGBUG: use haplotype likelihoods per-sample to make this more accurate - if( thisHapVC == null ) { - if( nextHapVC == null ) { x11++; } - else { x12++; } - } else { - if( nextHapVC == null ) { x21++; } - else { x22++; } + final ArrayList haplotypeList = new ArrayList(); + haplotypeList.add(h); + for( final String sample : haplotypes.get(0).getSampleKeySet() ) { + final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0]; + if( thisHapVC == null ) { + if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); } + else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); } + } else { + if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); } + else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); } + } } } if( thisVC == null || nextVC == null ) { @@ -374,7 +378,7 @@ public class GenotypingEngine { //throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart); } if( isBiallelic ) { - final double R2 = calculateR2LD( x11, x12, x21, x22 ); + final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) ); if( DEBUG ) { System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2)); System.out.println("-- " + thisVC); @@ -445,12 +449,11 @@ public class GenotypingEngine { return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make(); } - @Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"}) - protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) { - final int total = x11 + x12 + x21 + x22; - final double pa1b1 = ((double) x11) / ((double) total); - final double pa1b2 = ((double) x12) / ((double) total); - final double pa2b1 = ((double) x21) / ((double) total); + protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) { + final double total = x11 + x12 + x21 + x22; + final double pa1b1 = x11 / total; + final double pa1b2 = x12 / total; + final double pa2b1 = x21 / total; final double pa1 = pa1b1 + pa1b2; final double pb1 = pa1b1 + pa2b1; return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 14ea17483..1130feaea 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -415,7 +415,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); + final ArrayList bestHaplotypes = haplotypes;// ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); for( final Pair>> callResult : ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES @@ -496,7 +496,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem //--------------------------------------------------------------------------------------------------------------- private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { - if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); } + if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } final ArrayList finalizedReadList = new ArrayList(); final FragmentCollection fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) ); activeRegion.clearReads(); @@ -525,7 +525,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { final ArrayList readsToRemove = new ArrayList(); for( final GATKSAMRecord rec : activeRegion.getReads() ) { - if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { + if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); } } From 7ed06ee7b977f9a51d644332995af7f1b624176c Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 30 Jul 2012 12:16:27 -0400 Subject: [PATCH 024/176] Updating FindCoveredIntervals to use the changes to the ActiveRegionWalker. --- .../walkers/diagnostics/targets/FindCoveredIntervals.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index c964b0b4b..0c856c6df 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; @@ -47,13 +48,13 @@ public class FindCoveredIntervals extends ActiveRegionWalker { @Override // Look to see if the region has sufficient coverage - public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { + public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); // note the linear probability scale int coverageThreshold = 20; - return Math.min((double) depth / coverageThreshold, 1); + return new ActivityProfileResult(Math.min((double) depth / coverageThreshold, 1)); } From c2b57ee4449616781f3b2af7b3051617bde0632f Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 30 Jul 2012 12:41:40 -0400 Subject: [PATCH 025/176] updating HC integration tests after these changes. --- .../haplotypecaller/HaplotypeCallerIntegrationTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 9b149e8d1..17ad37deb 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -20,17 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "7b4e76934e0c911220b4e7da8776ab2b"); + HCTest(CEUTRIO_BAM, "", "eff4c820226abafcaa058c66585198a7"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "fcf0cea98a571d5e2d1dfa8b5edc599d"); + HCTest(NA12878_BAM, "", "2b40b314e6e63ae165186b55b14eee41"); } @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "d8317f4589e8e0c48bcd087cdb75ce88"); + HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "553870cc4d7e66f30862f8ae5dee01ff"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -41,7 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b"); + HCTestComplexVariants(CEUTRIO_BAM, "", "0936c41e8f006174f7cf27d97235133e"); } } From 3dabb90eb0502bcce352d3a34e99e0782e683d20 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 30 Jul 2012 21:26:16 -0400 Subject: [PATCH 026/176] Updating example active region walker integration test. --- .../sting/gatk/walkers/haplotypecaller/GenotypingEngine.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 5fc466336..2787689b5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -379,7 +379,6 @@ public class GenotypingEngine { } if( thisVC == null || nextVC == null ) { continue; - //throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart); } if( isBiallelic ) { final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) ); @@ -441,7 +440,7 @@ public class GenotypingEngine { altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases()); int iii = 0; - if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele + if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } } final ArrayList mergedAlleles = new ArrayList(); From 874dbf5b58669dfe2d0b22aa954598ca89a77b41 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 26 Jul 2012 09:46:56 -0400 Subject: [PATCH 027/176] Maximum wait for GATK run report upload reduced to 10 seconds --- .../org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index f190cbcfd..a42c73212 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -86,7 +86,7 @@ public class GATKRunReport { private static File REPORT_SENTINEL = new File(REPORT_DIR.getAbsolutePath() + "/ENABLE"); // number of milliseconds before the S3 put operation is timed-out: - private static final long S3PutTimeOut = 30 * 1000; + private static final long S3PutTimeOut = 10 * 1000; /** From 8db4e787b1f6ac57d15378893240e2ca203d369d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 26 Jul 2012 13:56:06 -0400 Subject: [PATCH 028/176] V1 of tool to visualize the quality score information in the context covariates -- Upgraded jgrapht to latest version (0.8.3) --- ivy.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ivy.xml b/ivy.xml index b197d0714..6dd5be7a8 100644 --- a/ivy.xml +++ b/ivy.xml @@ -52,7 +52,7 @@ - + From 06703162882253b63a5a3b23cbc681ae8e5fa97d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 26 Jul 2012 16:51:17 -0400 Subject: [PATCH 029/176] Be clearer that dcov 50 is good for 4x, should use 200 for >30x --- .../sting/gatk/walkers/genotyper/UnifiedGenotyper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index cd1815d82..6b279fd95 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -82,7 +82,7 @@ import java.util.*; * -o snps.raw.vcf \ * -stand_call_conf [50.0] \ * -stand_emit_conf 10.0 \ - * -dcov [50] \ + * -dcov [50 for 4x, 200 for >30x WGS or Whole exome] \ * [-L targets.interval_list] * * From 191294eedc9ddb7033f1e3fac304ef5b2bb6eee5 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 27 Jul 2012 09:39:16 -0400 Subject: [PATCH 030/176] Initial cleanup of RecalDatum for move and further refactoring -- Moved Datum, the now unnecessary superclass, into RecalDatum -- Fixed some obviously dangerous synchronization errors in RecalDatum, though these may not have caused problems because they may not have been called in parallel mode --- .../sting/gatk/walkers/bqsr/Datum.java | 109 ------------------ .../sting/gatk/walkers/bqsr/RecalDatum.java | 72 +++++++++--- 2 files changed, 54 insertions(+), 127 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java deleted file mode 100644 index d7e8e16b5..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java +++ /dev/null @@ -1,109 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -import org.broadinstitute.sting.utils.QualityUtils; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Jan 6, 2010 - * - * An individual piece of recalibration data. Optimized for CountCovariates. Extras added to make TableRecalibration fast have been removed. - * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. - */ - -public class Datum { - - long numObservations; // number of bases seen in total - long numMismatches; // number of bases seen that didn't match the reference - - private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero - - //--------------------------------------------------------------------------------------------------------------- - // - // constructors - // - //--------------------------------------------------------------------------------------------------------------- - - public Datum() { - numObservations = 0L; - numMismatches = 0L; - } - - public Datum(long numObservations, long numMismatches) { - this.numObservations = numObservations; - this.numMismatches = numMismatches; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // increment methods - // - //--------------------------------------------------------------------------------------------------------------- - - synchronized void increment(final long incObservations, final long incMismatches) { - numObservations += incObservations; - numMismatches += incMismatches; - } - - synchronized void increment(final boolean isError) { - numObservations++; - numMismatches += isError ? 1:0; - } - - //--------------------------------------------------------------------------------------------------------------- - // - // methods to derive empirical quality score - // - //--------------------------------------------------------------------------------------------------------------- - - double empiricalQualDouble() { - final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); - final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example - final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); - } - - byte empiricalQualByte() { - final double doubleMismatches = (double) (numMismatches); - final double doubleObservations = (double) (numObservations); - return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40 - } - - @Override - public String toString() { - return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte()); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof Datum)) - return false; - Datum other = (Datum) o; - return numMismatches == other.numMismatches && numObservations == other.numObservations; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java index 9b00b1876..ed4e769b1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java @@ -33,20 +33,40 @@ import org.broadinstitute.sting.utils.QualityUtils; import java.util.Random; /** + * An individual piece of recalibration data. Each bin counts up the number of observations and the number + * of reference mismatches seen for that combination of covariates. + * * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 3, 2009 - * - * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. */ - -public class RecalDatum extends Datum { - +public class RecalDatum { private static final double UNINITIALIZED = -1.0; - private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations - private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) + /** + * estimated reported quality score based on combined data's individual q-reporteds and number of observations + */ + private double estimatedQReported; + /** + * the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) + */ + private double empiricalQuality; + + /** + * number of bases seen in total + */ + long numObservations; + + /** + * number of bases seen that didn't match the reference + */ + long numMismatches; + + /** + * used when calculating empirical qualities to avoid division by zero + */ + private static final int SMOOTHING_CONSTANT = 1; //--------------------------------------------------------------------------------------------------------------- // @@ -68,26 +88,24 @@ public class RecalDatum extends Datum { this.empiricalQuality = copy.empiricalQuality; } - public void combine(final RecalDatum other) { + public synchronized void combine(final RecalDatum other) { final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); increment(other.numObservations, other.numMismatches); estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations); empiricalQuality = UNINITIALIZED; } - @Override - public void increment(final boolean isError) { - super.increment(isError); - empiricalQuality = UNINITIALIZED; - } - @Requires("empiricalQuality == UNINITIALIZED") @Ensures("empiricalQuality != UNINITIALIZED") - protected final void calcEmpiricalQuality() { - empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again + private synchronized final void calcEmpiricalQuality() { + // cache the value so we don't call log over and over again + final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); + final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT); + final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); + empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); } - public void setEstimatedQReported(final double estimatedQReported) { + public synchronized void setEstimatedQReported(final double estimatedQReported) { this.estimatedQReported = estimatedQReported; } @@ -95,7 +113,7 @@ public class RecalDatum extends Datum { return estimatedQReported; } - public void setEmpiricalQuality(final double empiricalQuality) { + public synchronized void setEmpiricalQuality(final double empiricalQuality) { this.empiricalQuality = empiricalQuality; } @@ -145,4 +163,22 @@ public class RecalDatum extends Datum { return super.equals(o) && MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0; } + + //--------------------------------------------------------------------------------------------------------------- + // + // increment methods + // + //--------------------------------------------------------------------------------------------------------------- + + synchronized void increment(final long incObservations, final long incMismatches) { + numObservations += incObservations; + numMismatches += incMismatches; + empiricalQuality = UNINITIALIZED; + } + + synchronized void increment(final boolean isError) { + numObservations++; + numMismatches += isError ? 1:0; + empiricalQuality = UNINITIALIZED; + } } From e00ed8bc5e2e2c9bf7376c4926fe1359de1a738c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 27 Jul 2012 12:25:06 -0400 Subject: [PATCH 031/176] Cleanup BQSR classes -- Moved most of BQSR classes (which are used throughout the codebase) to utils.recalibration. It's better in my opinion to keep commonly used code in utils, and only specialized code in walkers. As code becomes embedded throughout GATK its should be refactored to live in utils -- Removed unncessary imports of BQSR in VQSR v3 -- Now ready to refactor QualQuantizer and unit test into a subclass of RecalDatum, refactor unit tests into RecalDatum unit tests, and generalize into hierarchical recal datum that can be used in QualQuantizer and the analysis of adaptive context covariate -- Update PluginManager to sort the plugins and interfaces. This allows us to have a deterministic order in which the plugin classes come back, which caused BQSR integration tests to temporarily change because I moved my classes around a bit. --- .../bqsr/AdvancedRecalibrationEngine.java | 4 + .../reducereads/SyntheticRead.java | 2 +- .../sting/gatk/walkers/bqsr/BQSRGatherer.java | 6 +- .../gatk/walkers/bqsr/BaseRecalibrator.java | 26 +- .../bqsr/RecalibrationArgumentCollection.java | 41 ++-- .../walkers/bqsr/RecalibrationEngine.java | 1 + .../bqsr/StandardRecalibrationEngine.java | 4 + .../utils/classloader/PluginManager.java | 22 ++ .../sting/utils/clipping/ClippingOp.java | 2 +- .../sting/utils/clipping/ReadClipper.java | 2 +- .../sting/utils/fragments/FragmentUtils.java | 2 +- .../recalibration/BaseRecalibration.java | 4 +- .../recalibration}/EventType.java | 2 +- .../recalibration}/QuantizationInfo.java | 20 +- .../recalibration}/ReadCovariates.java | 2 +- .../recalibration}/RecalDatum.java | 60 +++-- .../recalibration/RecalUtils.java} | 226 +++++++++--------- .../recalibration}/RecalibrationReport.java | 63 ++--- .../recalibration/RecalibrationTables.java | 4 +- .../covariates}/BinaryTagCovariate.java | 4 +- .../covariates}/ContextCovariate.java | 4 +- .../recalibration/covariates}/Covariate.java | 9 +- .../covariates}/CycleCovariate.java | 4 +- .../covariates/ExperimentalCovariate.java | 30 +++ .../covariates}/QualityScoreCovariate.java | 4 +- .../covariates}/ReadGroupCovariate.java | 4 +- .../covariates/RequiredCovariate.java | 30 +++ .../covariates/StandardCovariate.java | 30 +++ .../sting/utils/sam/GATKSAMRecord.java | 2 +- .../walkers/bqsr/BQSRGathererUnitTest.java | 13 +- .../ContextCovariateUnitTest.java | 5 +- .../CycleCovariateUnitTest.java | 4 +- .../ReadCovariatesUnitTest.java | 6 +- .../ReadGroupCovariateUnitTest.java | 4 +- .../RecalibrationReportUnitTest.java | 7 +- 35 files changed, 413 insertions(+), 240 deletions(-) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/EventType.java (96%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/QuantizationInfo.java (78%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/ReadCovariates.java (97%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/RecalDatum.java (77%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr/RecalDataManager.java => utils/recalibration/RecalUtils.java} (96%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/RecalibrationReport.java (82%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/BinaryTagCovariate.java (89%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/ContextCovariate.java (98%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/Covariate.java (94%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/CycleCovariate.java (97%) create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/QualityScoreCovariate.java (92%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration/covariates}/ReadGroupCovariate.java (94%) create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/ContextCovariateUnitTest.java (89%) rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/CycleCovariateUnitTest.java (90%) rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/ReadCovariatesUnitTest.java (92%) rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/ReadGroupCovariateUnitTest.java (88%) rename public/java/test/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/RecalibrationReportUnitTest.java (95%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java index 9eca81852..d714ca185 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; * OTHER DEALINGS IN THE SOFTWARE. */ +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.recalibration.RecalDatum; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java index 9ee1a4634..3b9fc0390 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java @@ -5,7 +5,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java index 122958ac2..a6d82d5b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.commandline.Gatherer; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; +import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; import java.io.File; import java.io.FileNotFoundException; @@ -71,11 +73,11 @@ public class BQSRGatherer extends Gatherer { if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) { final File recal_out = new File(output.getName() + ".original"); final RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport); - RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(recal_out, originalReport.getRecalibrationTables(), generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); } else if (!RAC.NO_PLOTS) { final File recal_out = new File(output.getName() + ".recal"); - RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(recal_out, generalReport.getRecalibrationTables(), generalReport.getCovariates(), RAC.KEEP_INTERMEDIATE_FILES); } generalReport.output(outputFile); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index b2400a49d..e95a0e6c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -41,6 +42,9 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.QuantizationInfo; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; +import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -109,7 +113,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed @ArgumentCollection private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates - private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization + private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization private RecalibrationTables recalibrationTables; @@ -143,12 +147,12 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed throw new UserException.CommandLineException(NO_DBSNP_EXCEPTION); if (RAC.LIST_ONLY) { - RecalDataManager.listAvailableCovariates(logger); + RecalUtils.listAvailableCovariates(logger); System.exit(0); } RAC.recalibrationReport = getToolkit().getArguments().BQSR_RECAL_FILE; // if we have a recalibration file, record it so it goes on the report table - Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates + Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList requiredCovariates = covariates.getFirst(); ArrayList optionalCovariates = covariates.getSecond(); @@ -222,17 +226,17 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed if (readNotSeen(read)) { read.setTemporaryAttribute(SEEN_ATTRIBUTE, true); - RecalDataManager.parsePlatformForRead(read, RAC); - if (RecalDataManager.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { + RecalUtils.parsePlatformForRead(read, RAC); + if (RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true); continue; } - read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalDataManager.computeCovariates(read, requestedCovariates)); + read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); } if (!ReadUtils.isSOLiDRead(read) || // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it - RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING || - RecalDataManager.isColorSpaceConsistent(read, offset)) + RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING || + RecalUtils.isColorSpaceConsistent(read, offset)) recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); // This base finally passed all the checks for a good base, so add it to the big data hashmap } countedSites++; @@ -285,10 +289,10 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE; if (recalFile != null) { RecalibrationReport report = new RecalibrationReport(recalFile); - RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, report.getRecalibrationTables(), recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); } else - RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); + RecalUtils.generateRecalibrationPlot(RAC.RECAL_FILE, recalibrationTables, requestedCovariates, RAC.KEEP_INTERMEDIATE_FILES); } @@ -309,7 +313,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_FILE, "could not be created"); } - RecalDataManager.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output); + RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, output); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 2a94426a7..f04e4a1b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -29,6 +29,7 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; import java.io.File; import java.util.Collections; @@ -100,7 +101,7 @@ public class RecalibrationArgumentCollection { * reads which have had the reference inserted because of color space inconsistencies. */ @Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") - public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO; + public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO; /** * CountCovariates and TableRecalibration accept a --solid_nocall_strategy flag which governs how the recalibrator handles @@ -108,7 +109,7 @@ public class RecalibrationArgumentCollection { * their color space tag can not be recalibrated. */ @Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false) - public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; + public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; /** * The context covariate will use a context of this size to calculate it's covariate value for base mismatches @@ -177,41 +178,41 @@ public class RecalibrationArgumentCollection { public GATKReportTable generateReportTable() { GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); argumentsTable.addColumn("Argument"); - argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); argumentsTable.addRowID("covariate", true); - argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); + argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); argumentsTable.addRowID("no_standard_covs", true); - argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); + argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); argumentsTable.addRowID("run_without_dbsnp", true); - argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); + argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); argumentsTable.addRowID("solid_recal_mode", true); - argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE); + argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE); argumentsTable.addRowID("solid_nocall_strategy", true); - argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); + argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); argumentsTable.addRowID("mismatches_context_size", true); - argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); + argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); argumentsTable.addRowID("indels_context_size", true); - argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE); + argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE); argumentsTable.addRowID("mismatches_default_quality", true); - argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); + argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); argumentsTable.addRowID("insertions_default_quality", true); - argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY); + argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY); argumentsTable.addRowID("low_quality_tail", true); - argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL); + argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL); argumentsTable.addRowID("default_platform", true); - argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); + argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); argumentsTable.addRowID("force_platform", true); - argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); + argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); argumentsTable.addRowID("quantizing_levels", true); - argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); + argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); argumentsTable.addRowID("keep_intermediate_files", true); - argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); + argumentsTable.set("keep_intermediate_files", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); argumentsTable.addRowID("no_plots", true); - argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); + argumentsTable.set("no_plots", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); argumentsTable.addRowID("recalibration_report", true); - argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); + argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath()); argumentsTable.addRowID("binary_tag_name", true); - argumentsTable.set("binary_tag_name", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); + argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME); return argumentsTable; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java index aa62a18bc..38e306939 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java index a24506d07..08c7da754 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -25,10 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; * OTHER DEALINGS IN THE SOFTWARE. */ +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.classloader.PublicPackageSource; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.recalibration.RecalDatum; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index f24bbb636..9a2cb68db 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -168,6 +168,28 @@ public class PluginManager { String pluginName = getName(pluginClass); pluginsByName.put(pluginName, pluginClass); } + + // sort the plugins so the order of elements is deterministic + sortPlugins(plugins); + sortPlugins(interfaces); + } + + /** + * Sorts, in place, the list of plugins according to getName() on each element + * + * @param unsortedPlugins + */ + private final void sortPlugins(final List> unsortedPlugins) { + Collections.sort(unsortedPlugins, new ComparePluginsByName()); + } + + private final class ComparePluginsByName implements Comparator> { + @Override + public int compare(final Class aClass, final Class aClass1) { + String pluginName1 = getName(aClass); + String pluginName2 = getName(aClass1); + return pluginName1.compareTo(pluginName2); + } } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index a4383c3ae..554188bc1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -4,7 +4,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index ba9267222..6392ce4ce 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index c6eec24f1..851272673 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -4,7 +4,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index b5f7ad046..c09eb0063 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils.recalibration; import net.sf.samtools.SAMTag; import net.sf.samtools.SAMUtils; -import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; @@ -103,7 +103,7 @@ public class BaseRecalibration { } } - RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read + RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { read.setBaseQualities(null, errorModel); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java index 2650f0f8d..1c84518eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/EventType.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/EventType.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java similarity index 78% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index fb3aef949..2b67d12a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -1,11 +1,9 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.sting.utils.recalibration.QualQuantizer; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import java.util.Arrays; import java.util.List; @@ -41,7 +39,7 @@ public class QuantizationInfo { for (final RecalDatum value : qualTable.getAllValues()) { final RecalDatum datum = value; final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) - qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key + qualHistogram[empiricalQual] += datum.getNumObservations(); // add the number of observations for every key } empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities quantizeQualityScores(quantizationLevels); @@ -70,15 +68,15 @@ public class QuantizationInfo { } public GATKReportTable generateReportTable() { - GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); - quantizedTable.addColumn(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); - quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); - quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); + GATKReportTable quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); + quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME); + quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); + quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) { - quantizedTable.set(qual, RecalDataManager.QUALITY_SCORE_COLUMN_NAME, qual); - quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); - quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); + quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual); + quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual)); + quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual)); } return quantizedTable; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java index 5e907237d..c86bd4deb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; /** * The object temporarily held by a read that describes all of it's covariates. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java similarity index 77% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index ed4e769b1..43ff54378 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; /* * Copyright (c) 2009 The Broad Institute @@ -56,12 +56,12 @@ public class RecalDatum { /** * number of bases seen in total */ - long numObservations; + private long numObservations; /** * number of bases seen that didn't match the reference */ - long numMismatches; + private long numMismatches; /** * used when calculating empirical qualities to avoid division by zero @@ -82,16 +82,16 @@ public class RecalDatum { } public RecalDatum(final RecalDatum copy) { - this.numObservations = copy.numObservations; - this.numMismatches = copy.numMismatches; + this.numObservations = copy.getNumObservations(); + this.numMismatches = copy.getNumMismatches(); this.estimatedQReported = copy.estimatedQReported; this.empiricalQuality = copy.empiricalQuality; } public synchronized void combine(final RecalDatum other) { final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); - increment(other.numObservations, other.numMismatches); - estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations); + increment(other.getNumObservations(), other.getNumMismatches()); + estimatedQReported = -10 * Math.log10(sumErrors / getNumObservations()); empiricalQuality = UNINITIALIZED; } @@ -100,7 +100,8 @@ public class RecalDatum { private synchronized final void calcEmpiricalQuality() { // cache the value so we don't call log over and over again final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); - final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT); + // smoothing is one error and one non-error observation, for example + final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); } @@ -125,7 +126,7 @@ public class RecalDatum { @Override public String toString() { - return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality())); + return String.format("%d,%d,%d", getNumObservations(), getNumMismatches(), (byte) Math.floor(getEmpiricalQuality())); } public String stringForCSV() { @@ -133,7 +134,7 @@ public class RecalDatum { } private double calcExpectedErrors() { - return (double) this.numObservations * qualToErrorProb(estimatedQReported); + return (double) getNumObservations() * qualToErrorProb(estimatedQReported); } private double qualToErrorProb(final double qual) { @@ -170,15 +171,42 @@ public class RecalDatum { // //--------------------------------------------------------------------------------------------------------------- - synchronized void increment(final long incObservations, final long incMismatches) { - numObservations += incObservations; - numMismatches += incMismatches; + public long getNumObservations() { + return numObservations; + } + + public synchronized void setNumObservations(final long numObservations) { + this.numObservations = numObservations; empiricalQuality = UNINITIALIZED; } - synchronized void increment(final boolean isError) { - numObservations++; - numMismatches += isError ? 1:0; + public long getNumMismatches() { + return numMismatches; + } + + public synchronized void setNumMismatches(final long numMismatches) { + this.numMismatches = numMismatches; empiricalQuality = UNINITIALIZED; } + + public synchronized void incrementNumObservations(final long by) { + numObservations += by; + empiricalQuality = UNINITIALIZED; + } + + public synchronized void incrementNumMismatches(final long by) { + numMismatches += by; + empiricalQuality = UNINITIALIZED; + } + + public synchronized void increment(final long incObservations, final long incMismatches) { + incrementNumObservations(incObservations); + incrementNumMismatches(incMismatches); + } + + public synchronized void increment(final boolean isError) { + incrementNumObservations(1); + if ( isError ) + incrementNumMismatches(1); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index f40a62d53..fe6ef7018 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -23,11 +23,13 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; @@ -39,7 +41,6 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.io.Resource; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -59,7 +60,7 @@ import java.util.*; * This class holds the parsing methods that are shared between CountCovariates and TableRecalibration. */ -public class RecalDataManager { +public class RecalUtils { public final static String ARGUMENT_REPORT_TABLE_TITLE = "Arguments"; public final static String QUANTIZED_REPORT_TABLE_TITLE = "Quantized"; public final static String READGROUP_REPORT_TABLE_TITLE = "RecalTable0"; @@ -85,13 +86,108 @@ public class RecalDataManager { private static final String SCRIPT_FILE = "BQSR.R"; - private static final Pair covariateValue = new Pair(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s"); - private static final Pair covariateName = new Pair(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s"); - private static final Pair eventType = new Pair(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s"); - private static final Pair empiricalQuality = new Pair(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f"); - private static final Pair estimatedQReported = new Pair(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f"); - private static final Pair nObservations = new Pair(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); - private static final Pair nErrors = new Pair(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d"); + private static final Pair covariateValue = new Pair(RecalUtils.COVARIATE_VALUE_COLUMN_NAME, "%s"); + private static final Pair covariateName = new Pair(RecalUtils.COVARIATE_NAME_COLUMN_NAME, "%s"); + private static final Pair eventType = new Pair(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s"); + private static final Pair empiricalQuality = new Pair(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f"); + private static final Pair estimatedQReported = new Pair(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f"); + private static final Pair nObservations = new Pair(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); + private static final Pair nErrors = new Pair(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%d"); + + /** + * Generates two lists : required covariates and optional covariates based on the user's requests. + * + * Performs the following tasks in order: + * 1. Adds all requierd covariates in order + * 2. Check if the user asked to use the standard covariates and adds them all if that's the case + * 3. Adds all covariates requested by the user that were not already added by the two previous steps + * + * @param argumentCollection the argument collection object for the recalibration walker + * @return a pair of ordered lists : required covariates (first) and optional covariates (second) + */ + public static Pair, ArrayList> initializeCovariates(RecalibrationArgumentCollection argumentCollection) { + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); + final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); + final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); + + final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates + ArrayList optionalCovariates = new ArrayList(); + if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) + optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user + + if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified + for (String requestedCovariateString : argumentCollection.COVARIATES) { + boolean foundClass = false; + for (Class covClass : covariateClasses) { + if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class + foundClass = true; + if (!requiredClasses.contains(covClass) && + (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { + try { + final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it + optionalCovariates.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + } + } + + if (!foundClass) { + throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); + } + } + } + return new Pair, ArrayList>(requiredCovariates, optionalCovariates); + } + + /** + * Adds the required covariates to a covariate list + * + * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand. + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addRequiredCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + if (classes.size() != 2) + throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); + + dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. + dest.add(new QualityScoreCovariate()); + return dest; + } + + /** + * Adds the standard covariates to a covariate list + * + * @param classes list of classes to add to the covariate list + * @return the covariate list + */ + private static ArrayList addStandardCovariatesToList(List> classes) { + ArrayList dest = new ArrayList(classes.size()); + for (Class covClass : classes) { + try { + final Covariate covariate = (Covariate) covClass.newInstance(); + dest.add(covariate); + } catch (Exception e) { + throw new DynamicClassResolutionException(covClass, e); + } + } + return dest; + } + + public static void listAvailableCovariates(Logger logger) { + // Get a list of all available covariates + final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); + + // Print and exit if that's what was requested + logger.info("Available covariates:"); + for (Class covClass : covariateClasses) + logger.info(covClass.getSimpleName()); + logger.info(""); + } public enum SOLID_RECAL_MODE { @@ -152,64 +248,6 @@ public class RecalDataManager { } } - /** - * Generates two lists : required covariates and optional covariates based on the user's requests. - * - * Performs the following tasks in order: - * 1. Adds all requierd covariates in order - * 2. Check if the user asked to use the standard covariates and adds them all if that's the case - * 3. Adds all covariates requested by the user that were not already added by the two previous steps - * - * @param argumentCollection the argument collection object for the recalibration walker - * @return a pair of ordered lists : required covariates (first) and optional covariates (second) - */ - public static Pair, ArrayList> initializeCovariates(RecalibrationArgumentCollection argumentCollection) { - final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); - final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); - - final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates - ArrayList optionalCovariates = new ArrayList(); - if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) - optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user - - if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified - for (String requestedCovariateString : argumentCollection.COVARIATES) { - boolean foundClass = false; - for (Class covClass : covariateClasses) { - if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class - foundClass = true; - if (!requiredClasses.contains(covClass) && - (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { - try { - final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it - optionalCovariates.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - } - } - - if (!foundClass) { - throw new UserException.CommandLineException("The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates."); - } - } - } - return new Pair, ArrayList>(requiredCovariates, optionalCovariates); - } - - public static void listAvailableCovariates(Logger logger) { - // Get a list of all available covariates - final List> covariateClasses = new PluginManager(Covariate.class).getPlugins(); - - // Print and exit if that's what was requested - logger.info("Available covariates:"); - for (Class covClass : covariateClasses) - logger.info(covClass.getSimpleName()); - logger.info(""); - } - private static List generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { List result = new LinkedList(); int reportTableIndex = 0; @@ -272,8 +310,8 @@ public class RecalDataManager { reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality()); if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index) reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table - reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations); - reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches); + reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations()); + reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches()); rowIndex++; } @@ -320,7 +358,7 @@ public class RecalDataManager { files.getFirst().close(); final RScriptExecutor executor = new RScriptExecutor(); - executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class)); + executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class)); executor.addArgs(csvFileName.getAbsolutePath()); executor.addArgs(plotFileName.getAbsolutePath()); executor.exec(); @@ -480,14 +518,14 @@ public class RecalDataManager { */ public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) { if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base - if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); + if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read + final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG); if (attr != null) { byte[] colorSpace; if (attr instanceof String) colorSpace = ((String) attr).getBytes(); else - throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); + throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read if (read.getReadNegativeStrandFlag()) @@ -501,7 +539,7 @@ public class RecalDataManager { inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1); prevBase = readBases[i]; } - read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); + read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); } else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); @@ -545,7 +583,7 @@ public class RecalDataManager { * @return Returns true if the base was inconsistent with the color space */ public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) { - final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); + final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG); if (attr != null) { final byte[] inconsistency = (byte[]) attr; // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference! @@ -691,40 +729,4 @@ public class RecalDataManager { } - /** - * Adds the required covariates to a covariate list - * - * Note: this method really only checks if the classes object has the expected number of required covariates, then add them by hand. - * - * @param classes list of classes to add to the covariate list - * @return the covariate list - */ - private static ArrayList addRequiredCovariatesToList(List> classes) { - ArrayList dest = new ArrayList(classes.size()); - if (classes.size() != 2) - throw new ReviewedStingException("The number of required covariates has changed, this is a hard change in the code and needs to be inspected"); - - dest.add(new ReadGroupCovariate()); // enforce the order with RG first and QS next. - dest.add(new QualityScoreCovariate()); - return dest; - } - - /** - * Adds the standard covariates to a covariate list - * - * @param classes list of classes to add to the covariate list - * @return the covariate list - */ - private static ArrayList addStandardCovariatesToList(List> classes) { - ArrayList dest = new ArrayList(classes.size()); - for (Class covClass : classes) { - try { - final Covariate covariate = (Covariate) covClass.newInstance(); - dest.add(covariate); - } catch (Exception e) { - throw new DynamicClassResolutionException(covClass, e); - } - } - return dest; - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java similarity index 82% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index e69cf4d69..e6ab9e38b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -1,11 +1,12 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import java.io.File; import java.io.PrintStream; @@ -33,13 +34,13 @@ public class RecalibrationReport { public RecalibrationReport(final File RECAL_FILE) { final GATKReport report = new GATKReport(RECAL_FILE); - argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE); + argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE); RAC = initializeArgumentCollectionTable(argumentTable); - GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE); + GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE); quantizationInfo = initializeQuantizationTable(quantizedTable); - Pair, ArrayList> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates + Pair, ArrayList> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList requiredCovariates = covariates.getFirst(); ArrayList optionalCovariates = covariates.getSecond(); requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; @@ -57,13 +58,13 @@ public class RecalibrationReport { for (Covariate cov : requestedCovariates) cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection - recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE))); + recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE))); - parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE)); + parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE)); - parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE)); + parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE)); - parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables); + parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables); } @@ -85,7 +86,7 @@ public class RecalibrationReport { private int countReadGroups(final GATKReportTable reportTable) { Set readGroups = new HashSet(); for ( int i = 0; i < reportTable.getNumRows(); i++ ) - readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString()); + readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString()); return readGroups.size(); } @@ -139,17 +140,17 @@ public class RecalibrationReport { \ */ private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg); - final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME); tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual); - final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME); + final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME); final int covIndex = optionalCovariateIndexes.get(covName); - final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME); + final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME); tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempCOVarray[3] = event.index; recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray); @@ -164,11 +165,11 @@ public class RecalibrationReport { */ private void parseQualityScoreTable(final GATKReportTable reportTable, final NestedIntegerArray qualTable) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg); - final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME); tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempQUALarray[2] = event.index; qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray); @@ -183,9 +184,9 @@ public class RecalibrationReport { */ private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray rgTable) { for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME); tempRGarray[0] = requestedCovariates[0].keyFromValue(rg); - final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME)); tempRGarray[1] = event.index; rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray); @@ -193,13 +194,13 @@ public class RecalibrationReport { } private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) { - final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); + final long nObservations = (Long) reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME); + final long nErrors = (Long) reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME); final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table - (Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table - Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table + (Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table + Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1); datum.setEstimatedQReported(estimatedQReported); @@ -218,8 +219,8 @@ public class RecalibrationReport { final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1]; for ( int i = 0; i < table.getNumRows(); i++ ) { final byte originalQual = (byte)i; - final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); - final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); + final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); + final Object countObject = table.get(i, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); final byte quantizedQual = Byte.parseByte(quantizedObject.toString()); final long quantizedCount = Long.parseLong(countObject.toString()); quals[originalQual] = quantizedQual; @@ -239,7 +240,7 @@ public class RecalibrationReport { for ( int i = 0; i < table.getNumRows(); i++ ) { final String argument = table.get(i, "Argument").toString(); - Object value = table.get(i, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); if (value.equals("null")) value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport @@ -250,10 +251,10 @@ public class RecalibrationReport { RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value); else if (argument.equals("solid_recal_mode")) - RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value); + RAC.SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.recalModeFromString((String) value); else if (argument.equals("solid_nocall_strategy")) - RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); + RAC.SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value); else if (argument.equals("mismatches_context_size")) RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value); @@ -307,7 +308,7 @@ public class RecalibrationReport { } public void output(PrintStream output) { - RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output); + RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output); } public RecalibrationArgumentCollection getRAC() { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java index 0416b5eb9..f37e69c9a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.utils.recalibration; -import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; -import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java similarity index 89% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java index a89586c2c..cebdebf9d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/BinaryTagCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 5fe8809fb..4c20284d9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -23,8 +23,10 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java index 1ad5346fa..c613135bb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/Covariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /* @@ -89,8 +91,3 @@ public interface Covariate { public int maximumKeyValue(); } -interface RequiredCovariate extends Covariate {} - -interface StandardCovariate extends Covariate {} - -interface ExperimentalCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java index f0ff8f2bd..4f15419c7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java new file mode 100644 index 000000000..72df2a410 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ExperimentalCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface ExperimentalCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java similarity index 92% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java index dd7060ff8..3ef8ee931 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/QualityScoreCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java rename to public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java index f04d27b7a..85568dac9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration.covariates; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java new file mode 100644 index 000000000..50755dbcf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RequiredCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface RequiredCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java new file mode 100644 index 000000000..444954f25 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/StandardCovariate.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration.covariates; + +/** + * [Short one sentence description of this walker] + *

+ *

+ * [Functionality of this walker] + *

+ *

+ *

Input

+ *

+ * [Input description] + *

+ *

+ *

Output

+ *

+ * [Output description] + *

+ *

+ *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public interface StandardCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 659615cf4..c9b3a2df8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index 8e9f2533f..f1ffbe80f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.recalibration.RecalUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -33,15 +34,15 @@ public class BQSRGathererUnitTest { for (GATKReportTable originalTable : originalReport.getTables()) { GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName()); List columnsToTest = new LinkedList(); - columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL - columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); + columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME); + columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL + columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1); } - else if (originalTable.getTableName().equals(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE)) { - columnsToTest.add(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); + else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) { + columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java similarity index 89% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java index 553b7e237..2556448ad 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java @@ -1,5 +1,8 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.ContextCovariate; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java similarity index 90% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java index 3fa1e916d..c3d93b2cb 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java similarity index 92% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java index 37994cf12..dac26cb53 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -41,7 +43,7 @@ public class ReadCovariatesUnitTest { requestedCovariates[2] = coCov; requestedCovariates[3] = cyCov; - ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates); + ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); // check that the length is correct Assert.assertEquals(rc.getMismatchesKeySet().length, length); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java similarity index 88% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java index a83508353..78a74d259 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java @@ -1,5 +1,7 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.ReadGroupCovariate; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java similarity index 95% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java index e4a77c016..387cc94d6 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java @@ -1,9 +1,10 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; +package org.broadinstitute.sting.utils.recalibration; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -72,7 +73,7 @@ public class RecalibrationReportUnitTest { final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE); int nKeys = 0; // keep track of how many keys were produced - final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates); + final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates); final NestedIntegerArray rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); From 57b45bfb1e284abd335ad9965776b6228fd2e504 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 27 Jul 2012 14:34:13 -0400 Subject: [PATCH 032/176] Extensive unit tests, contacts, and documentation for RecalDatum --- .../sting/utils/QualityUtils.java | 5 + .../sting/utils/recalibration/RecalDatum.java | 102 ++++++++++++++---- .../org/broadinstitute/sting/BaseTest.java | 10 +- 3 files changed, 93 insertions(+), 24 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 4acc0e2c3..1242e5b00 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -69,9 +69,14 @@ public class QualityUtils { * @return a probability (0.0 - 1.0) */ static private double qualToErrorProbRaw(int qual) { + return qualToErrorProb((double) qual); + } + + public static double qualToErrorProb(final double qual) { return Math.pow(10.0, ((double) qual)/-10.0); } + static public double qualToErrorProb(byte qual) { return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc. } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index 43ff54378..6ee5df68b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.recalibration; */ import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; @@ -40,6 +41,17 @@ import java.util.Random; * User: rpoplin * Date: Nov 3, 2009 */ +@Invariant({ + "estimatedQReported >= 0.0", + "! Double.isNaN(estimatedQReported)", + "! Double.isInfinite(estimatedQReported)", + "empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED", + "! Double.isNaN(empiricalQuality)", + "! Double.isInfinite(empiricalQuality)", + "numObservations >= 0", + "numMismatches >= 0", + "numMismatches <= numObservations" +}) public class RecalDatum { private static final double UNINITIALIZED = -1.0; @@ -74,13 +86,28 @@ public class RecalDatum { // //--------------------------------------------------------------------------------------------------------------- + /** + * Create a new RecalDatum with given observation and mismatch counts, and an reported quality + * + * @param _numObservations + * @param _numMismatches + * @param reportedQuality + */ public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) { + if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0"); + if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0"); + if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0"); + numObservations = _numObservations; numMismatches = _numMismatches; estimatedQReported = reportedQuality; empiricalQuality = UNINITIALIZED; } + /** + * Copy copy into this recal datum, overwriting all of this objects data + * @param copy + */ public RecalDatum(final RecalDatum copy) { this.numObservations = copy.getNumObservations(); this.numMismatches = copy.getNumMismatches(); @@ -88,6 +115,12 @@ public class RecalDatum { this.empiricalQuality = copy.empiricalQuality; } + /** + * Add in all of the data from other into this object, updating the reported quality from the expected + * error rate implied by the two reported qualities + * + * @param other + */ public synchronized void combine(final RecalDatum other) { final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); increment(other.getNumObservations(), other.getNumMismatches()); @@ -95,26 +128,26 @@ public class RecalDatum { empiricalQuality = UNINITIALIZED; } - @Requires("empiricalQuality == UNINITIALIZED") - @Ensures("empiricalQuality != UNINITIALIZED") - private synchronized final void calcEmpiricalQuality() { - // cache the value so we don't call log over and over again - final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); - // smoothing is one error and one non-error observation, for example - final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); - final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); - empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); - } - public synchronized void setEstimatedQReported(final double estimatedQReported) { + if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0"); + if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite"); + if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN"); + this.estimatedQReported = estimatedQReported; } public final double getEstimatedQReported() { return estimatedQReported; } + public final byte getEstimatedQReportedAsByte() { + return (byte)(int)(Math.round(getEstimatedQReported())); + } public synchronized void setEmpiricalQuality(final double empiricalQuality) { + if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0"); + if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite"); + if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN"); + this.empiricalQuality = empiricalQuality; } @@ -133,14 +166,6 @@ public class RecalDatum { return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported()); } - private double calcExpectedErrors() { - return (double) getNumObservations() * qualToErrorProb(estimatedQReported); - } - - private double qualToErrorProb(final double qual) { - return Math.pow(10.0, qual / -10.0); - } - public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { final Random random = new Random(); final int nObservations = random.nextInt(maxObservations); @@ -176,6 +201,7 @@ public class RecalDatum { } public synchronized void setNumObservations(final long numObservations) { + if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0"); this.numObservations = numObservations; empiricalQuality = UNINITIALIZED; } @@ -184,29 +210,67 @@ public class RecalDatum { return numMismatches; } + @Requires({"numMismatches >= 0"}) public synchronized void setNumMismatches(final long numMismatches) { + if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0"); this.numMismatches = numMismatches; empiricalQuality = UNINITIALIZED; } + @Requires({"by >= 0"}) public synchronized void incrementNumObservations(final long by) { numObservations += by; empiricalQuality = UNINITIALIZED; } + @Requires({"by >= 0"}) public synchronized void incrementNumMismatches(final long by) { numMismatches += by; empiricalQuality = UNINITIALIZED; } + @Requires({"incObservations >= 0", "incMismatches >= 0"}) + @Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"}) public synchronized void increment(final long incObservations, final long incMismatches) { incrementNumObservations(incObservations); incrementNumMismatches(incMismatches); } + @Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"}) public synchronized void increment(final boolean isError) { incrementNumObservations(1); if ( isError ) incrementNumMismatches(1); } + + // ------------------------------------------------------------------------------------- + // + // Private implementation helper functions + // + // ------------------------------------------------------------------------------------- + + /** + * Calculate and cache the empirical quality score from mismatches and observations (expensive operation) + */ + @Requires("empiricalQuality == UNINITIALIZED") + @Ensures("empiricalQuality != UNINITIALIZED") + private synchronized final void calcEmpiricalQuality() { + // cache the value so we don't call log over and over again + final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); + // smoothing is one error and one non-error observation, for example + final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); + final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); + empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); + } + + /** + * calculate the expected number of errors given the estimated Q reported and the number of observations + * in this datum. + * + * @return a positive (potentially fractional) estimate of the number of errors + */ + @Ensures("result >= 0.0") + private double calcExpectedErrors() { + return (double) getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported); + } } diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index af4891856..76e25a3c0 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -282,12 +282,12 @@ public abstract class BaseTest { private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1; public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) { - Assert.assertTrue(actual instanceof Double); + Assert.assertTrue(actual instanceof Double, "Not a double"); assertEqualsDoubleSmart((double)(Double)actual, (double)expected); } public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) { - Assert.assertTrue(actual instanceof Double); + Assert.assertTrue(actual instanceof Double, "Not a double"); assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance); } @@ -303,13 +303,13 @@ public abstract class BaseTest { public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) { if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately - Assert.assertTrue(Double.isNaN(actual)); + Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not"); else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately - Assert.assertTrue(Double.isInfinite(actual)); + Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not"); else { final double delta = Math.abs(actual - expected); final double ratio = Math.abs(actual / expected - 1.0); - Assert.assertTrue(delta < tolerance || ratio < tolerance); + Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance); } } } From 315d25409f34f409022c7ccb0271be83cb3b6900 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 29 Jul 2012 13:13:13 -0400 Subject: [PATCH 033/176] Improvement to RecalDatum and VisualizeContextTree -- Reorganize functions in RecalDatum so that error rate can be computed indepentently. Added unit tests. Removed equals() method, which is a buggy without it's associated implementation for hashcode -- New class RecalDatumTree based on QualIntervals that inherits from RecalDatum but includes the concept of sub data -- VisualizeContextTree now uses RecalDatumTree and can trivially compute the penalty function for merging nodes, which it displays in the graph --- .../sting/utils/recalibration/RecalDatum.java | 85 +++++++++++++------ .../utils/recalibration/RecalDatumTree.java | 76 +++++++++++++++++ 2 files changed, 133 insertions(+), 28 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumTree.java diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index 6ee5df68b..249422c17 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -136,6 +136,14 @@ public class RecalDatum { this.estimatedQReported = estimatedQReported; } + public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { + final Random random = new Random(); + final int nObservations = random.nextInt(maxObservations); + final int nErrors = random.nextInt(maxErrors); + final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE); + return new RecalDatum(nObservations, nErrors, (byte)qual); + } + public final double getEstimatedQReported() { return estimatedQReported; } @@ -143,6 +151,29 @@ public class RecalDatum { return (byte)(int)(Math.round(getEstimatedQReported())); } + //--------------------------------------------------------------------------------------------------------------- + // + // Empirical quality score -- derived from the num mismatches and observations + // + //--------------------------------------------------------------------------------------------------------------- + + /** + * Returns the error rate (in real space) of this interval, or 0 if there are no obserations + * @return the empirical error rate ~= N errors / N obs + */ + @Ensures("result >= 0.0") + public double getEmpiricalErrorRate() { + if ( numObservations == 0 ) + return 0.0; + else { + // cache the value so we don't call log over and over again + final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); + // smoothing is one error and one non-error observation, for example + final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); + return doubleMismatches / doubleObservations; + } + } + public synchronized void setEmpiricalQuality(final double empiricalQuality) { if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0"); if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite"); @@ -157,6 +188,16 @@ public class RecalDatum { return empiricalQuality; } + public final byte getEmpiricalQualityAsByte() { + return (byte)(Math.round(getEmpiricalQuality())); + } + + //--------------------------------------------------------------------------------------------------------------- + // + // increment methods + // + //--------------------------------------------------------------------------------------------------------------- + @Override public String toString() { return String.format("%d,%d,%d", getNumObservations(), getNumMismatches(), (byte) Math.floor(getEmpiricalQuality())); @@ -166,29 +207,21 @@ public class RecalDatum { return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported()); } - public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { - final Random random = new Random(); - final int nObservations = random.nextInt(maxObservations); - final int nErrors = random.nextInt(maxErrors); - final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE); - return new RecalDatum(nObservations, nErrors, (byte)qual); - } - - /** - * We don't compare the estimated quality reported because it may be different when read from - * report tables. - * - * @param o the other recal datum - * @return true if the two recal datums have the same number of observations, errors and empirical quality. - */ - @Override - public boolean equals(Object o) { - if (!(o instanceof RecalDatum)) - return false; - RecalDatum other = (RecalDatum) o; - return super.equals(o) && - MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0; - } +// /** +// * We don't compare the estimated quality reported because it may be different when read from +// * report tables. +// * +// * @param o the other recal datum +// * @return true if the two recal datums have the same number of observations, errors and empirical quality. +// */ +// @Override +// public boolean equals(Object o) { +// if (!(o instanceof RecalDatum)) +// return false; +// RecalDatum other = (RecalDatum) o; +// return super.equals(o) && +// MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0; +// } //--------------------------------------------------------------------------------------------------------------- // @@ -255,11 +288,7 @@ public class RecalDatum { @Requires("empiricalQuality == UNINITIALIZED") @Ensures("empiricalQuality != UNINITIALIZED") private synchronized final void calcEmpiricalQuality() { - // cache the value so we don't call log over and over again - final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); - // smoothing is one error and one non-error observation, for example - final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); - final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); + final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate()); empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumTree.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumTree.java new file mode 100644 index 000000000..210ea53bf --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumTree.java @@ -0,0 +1,76 @@ +package org.broadinstitute.sting.utils.recalibration; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one + * + * @author Mark DePristo + * @since 07/27/12 + */ +public class RecalDatumTree extends RecalDatum { + final Set subnodes; + + protected RecalDatumTree(final long nObservations, final long nErrors, final byte reportedQual) { + this(nObservations, nErrors, reportedQual, new HashSet()); + } + + public RecalDatumTree(final long nObservations, final long nErrors, final byte reportedQual, final Set subnodes) { + super(nObservations, nErrors, reportedQual); + this.subnodes = new HashSet(subnodes); + } + + public double getPenalty() { + return calcPenalty(getEmpiricalErrorRate()); + } + + public void addSubnode(final RecalDatumTree sub) { + subnodes.add(sub); + } + + public boolean isLeaf() { + return subnodes.isEmpty(); + } + + /** + * Calculate the penalty of this interval, given the overall error rate for the interval + * + * If the globalErrorRate is e, this value is: + * + * sum_i |log10(e_i) - log10(e)| * nObservations_i + * + * each the index i applies to all leaves of the tree accessible from this interval + * (found recursively from subnodes as necessary) + * + * @param globalErrorRate overall error rate in real space against which we calculate the penalty + * @return the cost of approximating the bins in this interval with the globalErrorRate + */ + @Requires("globalErrorRate >= 0.0") + @Ensures("result >= 0.0") + private double calcPenalty(final double globalErrorRate) { + if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty + return 0.0; + + if ( isLeaf() ) { + // this is leave node + return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations(); + // TODO -- how we can generalize this calculation? +// if ( this.qEnd <= minInterestingQual ) +// // It's free to merge up quality scores below the smallest interesting one +// return 0; +// else { +// return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations(); +// } + } else { + double sum = 0; + for ( final RecalDatumTree hrd : subnodes) + sum += hrd.calcPenalty(globalErrorRate); + return sum; + } + } +} From 93640b382ebe79d269ada3c006530d8a3f49330f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 30 Jul 2012 08:31:38 -0400 Subject: [PATCH 034/176] Preliminary version of adaptive context covariate algorithm -- Works according to visual inspection of output tree --- .../utils/recalibration/RecalDatumNode.java | 213 ++++++++++++++++++ .../utils/recalibration/RecalDatumTree.java | 76 ------- 2 files changed, 213 insertions(+), 76 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumTree.java diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java new file mode 100644 index 000000000..62ea67d7c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -0,0 +1,213 @@ +package org.broadinstitute.sting.utils.recalibration; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.HashSet; +import java.util.Set; + +/** + * A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one + * + * @author Mark DePristo + * @since 07/27/12 + */ +public class RecalDatumNode { + protected static Logger logger = Logger.getLogger(RecalDatumNode.class); + private final static double UNINITIALIZED = -1.0; + private final T recalDatum; + private double fixedPenalty = UNINITIALIZED; + private final Set> subnodes; + + public RecalDatumNode(final T recalDatum) { + this(recalDatum, new HashSet>()); + } + + @Override + public String toString() { + return recalDatum.toString(); + } + + public RecalDatumNode(final T recalDatum, final Set> subnodes) { + this(recalDatum, UNINITIALIZED, subnodes); + } + + protected RecalDatumNode(final T recalDatum, final double fixedPenalty) { + this(recalDatum, fixedPenalty, new HashSet>()); + } + + protected RecalDatumNode(final T recalDatum, final double fixedPenalty, final Set> subnodes) { + this.recalDatum = recalDatum; + this.fixedPenalty = fixedPenalty; + this.subnodes = new HashSet>(subnodes); + } + + public T getRecalDatum() { + return recalDatum; + } + + public Set> getSubnodes() { + return subnodes; + } + + public double getPenalty() { + if ( fixedPenalty != UNINITIALIZED ) + return fixedPenalty; + else + return calcPenalty(recalDatum.getEmpiricalErrorRate()); + } + + public double calcAndSetFixedPenalty(final boolean doEntireTree) { + fixedPenalty = calcPenalty(recalDatum.getEmpiricalErrorRate()); + if ( doEntireTree ) + for ( final RecalDatumNode sub : subnodes ) + sub.calcAndSetFixedPenalty(doEntireTree); + return fixedPenalty; + } + + public void addSubnode(final RecalDatumNode sub) { + subnodes.add(sub); + } + + public boolean isLeaf() { + return subnodes.isEmpty(); + } + + public int getNumBranches() { + return subnodes.size(); + } + + public double getMinNodePenalty() { + if ( isLeaf() ) + return Double.MAX_VALUE; + else { + double minPenalty = getPenalty(); + for ( final RecalDatumNode sub : subnodes ) + minPenalty = Math.min(minPenalty, sub.getMinNodePenalty()); + return minPenalty; + } + } + + public int maxDepth() { + int subMax = 0; + for ( final RecalDatumNode sub : subnodes ) + subMax = Math.max(subMax, sub.maxDepth()); + return subMax + 1; + } + + public int size() { + int size = 1; + for ( final RecalDatumNode sub : subnodes ) + size += sub.size(); + return size; + } + + /** + * Calculate the penalty of this interval, given the overall error rate for the interval + * + * If the globalErrorRate is e, this value is: + * + * sum_i |log10(e_i) - log10(e)| * nObservations_i + * + * each the index i applies to all leaves of the tree accessible from this interval + * (found recursively from subnodes as necessary) + * + * @param globalErrorRate overall error rate in real space against which we calculate the penalty + * @return the cost of approximating the bins in this interval with the globalErrorRate + */ + @Requires("globalErrorRate >= 0.0") + @Ensures("result >= 0.0") + private double calcPenalty(final double globalErrorRate) { + if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty + return 0.0; + + if ( isLeaf() ) { + // this is leave node + return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations(); + // TODO -- how we can generalize this calculation? +// if ( this.qEnd <= minInterestingQual ) +// // It's free to merge up quality scores below the smallest interesting one +// return 0; +// else { +// return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations(); +// } + } else { + double sum = 0; + for ( final RecalDatumNode hrd : subnodes) + sum += hrd.calcPenalty(globalErrorRate); + return sum; + } + } + + public RecalDatumNode pruneToDepth(final int maxDepth) { + if ( maxDepth < 1 ) + throw new IllegalArgumentException("maxDepth < 1"); + else { + final Set> subPruned = new HashSet>(getNumBranches()); + if ( maxDepth > 1 ) + for ( final RecalDatumNode sub : subnodes ) + subPruned.add(sub.pruneToDepth(maxDepth - 1)); + return new RecalDatumNode(getRecalDatum(), fixedPenalty, subPruned); + } + } + + public RecalDatumNode pruneByPenalty(final int maxElements) { + RecalDatumNode root = this; + + while ( root.size() > maxElements ) { + // remove the lowest penalty element, and continue + root = root.removeLowestPenaltyNode(); + } + + // our size is below the target, so we are good, return + return root; + } + + /** + * Find the lowest penalty node in the tree, and return a tree without it + * + * Note this excludes the current (root) node + * + * @return + */ + private RecalDatumNode removeLowestPenaltyNode() { + final RecalDatumNode oneRemoved = removeFirstNodeWithPenalty(getMinNodePenalty()).getFirst(); + if ( oneRemoved == null ) + throw new IllegalStateException("Removed our root node, wow, didn't expect that"); + return oneRemoved; + } + + private Pair, Boolean> removeFirstNodeWithPenalty(final double penaltyToRemove) { + if ( getPenalty() == penaltyToRemove ) { + logger.info("Removing " + this + " with penalty " + penaltyToRemove); + if ( isLeaf() ) + throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + penaltyToRemove); + // node is the thing we are going to remove, but without any subnodes + final RecalDatumNode node = new RecalDatumNode(getRecalDatum(), fixedPenalty); + return new Pair, Boolean>(node, true); + } else { + // did we remove something in a sub branch? + boolean removedSomething = false; + + // our sub nodes with the penalty node removed + final Set> sub = new HashSet>(getNumBranches()); + + for ( final RecalDatumNode sub1 : subnodes ) { + if ( removedSomething ) { + // already removed something, just add sub1 back to sub + sub.add(sub1); + } else { + // haven't removed anything yet, so try + final Pair, Boolean> maybeRemoved = sub1.removeFirstNodeWithPenalty(penaltyToRemove); + removedSomething = maybeRemoved.getSecond(); + sub.add(maybeRemoved.getFirst()); + } + } + + final RecalDatumNode node = new RecalDatumNode(getRecalDatum(), fixedPenalty, sub); + return new Pair, Boolean>(node, removedSomething); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumTree.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumTree.java deleted file mode 100644 index 210ea53bf..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumTree.java +++ /dev/null @@ -1,76 +0,0 @@ -package org.broadinstitute.sting.utils.recalibration; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; - -import java.util.Collections; -import java.util.HashSet; -import java.util.Set; - -/** - * A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one - * - * @author Mark DePristo - * @since 07/27/12 - */ -public class RecalDatumTree extends RecalDatum { - final Set subnodes; - - protected RecalDatumTree(final long nObservations, final long nErrors, final byte reportedQual) { - this(nObservations, nErrors, reportedQual, new HashSet()); - } - - public RecalDatumTree(final long nObservations, final long nErrors, final byte reportedQual, final Set subnodes) { - super(nObservations, nErrors, reportedQual); - this.subnodes = new HashSet(subnodes); - } - - public double getPenalty() { - return calcPenalty(getEmpiricalErrorRate()); - } - - public void addSubnode(final RecalDatumTree sub) { - subnodes.add(sub); - } - - public boolean isLeaf() { - return subnodes.isEmpty(); - } - - /** - * Calculate the penalty of this interval, given the overall error rate for the interval - * - * If the globalErrorRate is e, this value is: - * - * sum_i |log10(e_i) - log10(e)| * nObservations_i - * - * each the index i applies to all leaves of the tree accessible from this interval - * (found recursively from subnodes as necessary) - * - * @param globalErrorRate overall error rate in real space against which we calculate the penalty - * @return the cost of approximating the bins in this interval with the globalErrorRate - */ - @Requires("globalErrorRate >= 0.0") - @Ensures("result >= 0.0") - private double calcPenalty(final double globalErrorRate) { - if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty - return 0.0; - - if ( isLeaf() ) { - // this is leave node - return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations(); - // TODO -- how we can generalize this calculation? -// if ( this.qEnd <= minInterestingQual ) -// // It's free to merge up quality scores below the smallest interesting one -// return 0; -// else { -// return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations(); -// } - } else { - double sum = 0; - for ( final RecalDatumTree hrd : subnodes) - sum += hrd.calcPenalty(globalErrorRate); - return sum; - } - } -} From 0c4e729e13d0ce5e24dc28cc14664dd1519632a9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 30 Jul 2012 15:44:33 -0400 Subject: [PATCH 035/176] Working version of adaptive context calculations -- Uses chi2 test for independences to determine if subcontext is worth representing. Give excellent visual results -- Writes out analysis output file producing excellent results in R -- Trivial reformatting of MathUtils --- .../utils/recalibration/RecalDatumNode.java | 128 +++++++++++++++--- 1 file changed, 112 insertions(+), 16 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index 62ea67d7c..3af91be16 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -2,7 +2,9 @@ package org.broadinstitute.sting.utils.recalibration; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import org.apache.commons.math.stat.inference.ChiSquareTestImpl; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import java.util.HashSet; @@ -15,8 +17,9 @@ import java.util.Set; * @since 07/27/12 */ public class RecalDatumNode { + private final static boolean USE_CHI2 = true; protected static Logger logger = Logger.getLogger(RecalDatumNode.class); - private final static double UNINITIALIZED = -1.0; + private final static double UNINITIALIZED = Double.NEGATIVE_INFINITY; private final T recalDatum; private double fixedPenalty = UNINITIALIZED; private final Set> subnodes; @@ -56,11 +59,11 @@ public class RecalDatumNode { if ( fixedPenalty != UNINITIALIZED ) return fixedPenalty; else - return calcPenalty(recalDatum.getEmpiricalErrorRate()); + return calcPenalty(); } public double calcAndSetFixedPenalty(final boolean doEntireTree) { - fixedPenalty = calcPenalty(recalDatum.getEmpiricalErrorRate()); + fixedPenalty = calcPenalty(); if ( doEntireTree ) for ( final RecalDatumNode sub : subnodes ) sub.calcAndSetFixedPenalty(doEntireTree); @@ -79,14 +82,23 @@ public class RecalDatumNode { return subnodes.size(); } - public double getMinNodePenalty() { + /** + * Total penalty is the sum of leaf node penalties + * + * This algorithm assumes that penalties have been fixed before pruning, as leaf nodes by + * definition have 0 penalty unless they represent a pruned tree with underlying -- but now + * pruned -- subtrees + * + * @return + */ + public double totalPenalty() { if ( isLeaf() ) - return Double.MAX_VALUE; + return getPenalty(); else { - double minPenalty = getPenalty(); + double sum = 0.0; for ( final RecalDatumNode sub : subnodes ) - minPenalty = Math.min(minPenalty, sub.getMinNodePenalty()); - return minPenalty; + sum += sub.totalPenalty(); + return sum; } } @@ -97,6 +109,17 @@ public class RecalDatumNode { return subMax + 1; } + public int minDepth() { + if ( isLeaf() ) + return 1; + else { + int subMin = Integer.MAX_VALUE; + for ( final RecalDatumNode sub : subnodes ) + subMin = Math.min(subMin, sub.minDepth()); + return subMin + 1; + } + } + public int size() { int size = 1; for ( final RecalDatumNode sub : subnodes ) @@ -104,6 +127,58 @@ public class RecalDatumNode { return size; } + public int numLeaves() { + if ( isLeaf() ) + return 1; + else { + int size = 0; + for ( final RecalDatumNode sub : subnodes ) + size += sub.numLeaves(); + return size; + } + } + + private double calcPenalty() { + if ( USE_CHI2 ) + return calcPenaltyChi2(); + else + return calcPenaltyLog10(getRecalDatum().getEmpiricalErrorRate()); + } + + private double calcPenaltyChi2() { + if ( isLeaf() ) + return 0.0; + else { + final long[][] counts = new long[subnodes.size()][2]; + + int i = 0; + for ( RecalDatumNode subnode : subnodes ) { + counts[i][0] = subnode.getRecalDatum().getNumMismatches(); + counts[i][1] = subnode.getRecalDatum().getNumObservations(); + i++; + } + + final double chi2 = new ChiSquareTestImpl().chiSquare(counts); + +// StringBuilder x = new StringBuilder(); +// StringBuilder y = new StringBuilder(); +// for ( int k = 0; k < counts.length; k++) { +// if ( k != 0 ) { +// x.append(", "); +// y.append(", "); +// } +// x.append(counts[k][0]); +// y.append(counts[k][1]); +// } +// logger.info("x = c(" + x.toString() + ")"); +// logger.info("y = c(" + y.toString() + ")"); +// logger.info("chi2 = " + chi2); + + return chi2; + //return Math.log10(chi2); + } + } + /** * Calculate the penalty of this interval, given the overall error rate for the interval * @@ -119,7 +194,7 @@ public class RecalDatumNode { */ @Requires("globalErrorRate >= 0.0") @Ensures("result >= 0.0") - private double calcPenalty(final double globalErrorRate) { + private double calcPenaltyLog10(final double globalErrorRate) { if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty return 0.0; @@ -136,7 +211,7 @@ public class RecalDatumNode { } else { double sum = 0; for ( final RecalDatumNode hrd : subnodes) - sum += hrd.calcPenalty(globalErrorRate); + sum += hrd.calcPenaltyLog10(globalErrorRate); return sum; } } @@ -173,17 +248,38 @@ public class RecalDatumNode { * @return */ private RecalDatumNode removeLowestPenaltyNode() { - final RecalDatumNode oneRemoved = removeFirstNodeWithPenalty(getMinNodePenalty()).getFirst(); + final Pair, Double> nodeToRemove = getMinPenaltyNode(); + logger.info("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond()); + + final Pair, Boolean> result = removeNode(nodeToRemove.getFirst()); + + if ( ! result.getSecond() ) + throw new IllegalStateException("Never removed any node!"); + + final RecalDatumNode oneRemoved = result.getFirst(); if ( oneRemoved == null ) throw new IllegalStateException("Removed our root node, wow, didn't expect that"); return oneRemoved; } - private Pair, Boolean> removeFirstNodeWithPenalty(final double penaltyToRemove) { - if ( getPenalty() == penaltyToRemove ) { - logger.info("Removing " + this + " with penalty " + penaltyToRemove); + private Pair, Double> getMinPenaltyNode() { + final double myValue = isLeaf() ? Double.MAX_VALUE : getPenalty(); + Pair, Double> maxNode = new Pair, Double>(this, myValue); + + for ( final RecalDatumNode sub : subnodes ) { + final Pair, Double> subFind = sub.getMinPenaltyNode(); + if ( subFind.getSecond() < maxNode.getSecond() ) { + maxNode = subFind; + } + } + + return maxNode; + } + + private Pair, Boolean> removeNode(final RecalDatumNode nodeToRemove) { + if ( this == nodeToRemove ) { if ( isLeaf() ) - throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + penaltyToRemove); + throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + nodeToRemove); // node is the thing we are going to remove, but without any subnodes final RecalDatumNode node = new RecalDatumNode(getRecalDatum(), fixedPenalty); return new Pair, Boolean>(node, true); @@ -200,7 +296,7 @@ public class RecalDatumNode { sub.add(sub1); } else { // haven't removed anything yet, so try - final Pair, Boolean> maybeRemoved = sub1.removeFirstNodeWithPenalty(penaltyToRemove); + final Pair, Boolean> maybeRemoved = sub1.removeNode(nodeToRemove); removedSomething = maybeRemoved.getSecond(); sub.add(maybeRemoved.getFirst()); } From dad9bb11920b0877e6bd31cbede5f030a62d1d62 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 30 Jul 2012 15:46:00 -0400 Subject: [PATCH 036/176] Changes order of writing BaseRecalibrator results so that if R blows up you still get a meaningful tree --- .../sting/gatk/walkers/bqsr/BaseRecalibrator.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index e95a0e6c1..ddd75e232 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -275,13 +275,16 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed public void onTraversalDone(Long result) { logger.info("Calculating quantized quality scores..."); quantizeQualityScores(); + + logger.info("Writing recalibration report..."); + generateReport(); + logger.info("...done!"); + if (!RAC.NO_PLOTS) { logger.info("Generating recalibration plots..."); generatePlots(); } - logger.info("Writing recalibration report..."); - generateReport(); - logger.info("...done!"); + logger.info("Processed: " + result + " sites"); } From 762a3d9b50425deefcf20fc7798a5278522b9e23 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 30 Jul 2012 15:47:36 -0400 Subject: [PATCH 037/176] Move BQSR.R to utils/recalibration in R --- .../sting/{gatk/walkers/bqsr => utils/recalibration}/BQSR.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename public/R/scripts/org/broadinstitute/sting/{gatk/walkers/bqsr => utils/recalibration}/BQSR.R (100%) diff --git a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R similarity index 100% rename from public/R/scripts/org/broadinstitute/sting/gatk/walkers/bqsr/BQSR.R rename to public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R From f7133ffc319ec56a6a19890d6526f3307972d84b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 31 Jul 2012 08:10:51 -0400 Subject: [PATCH 038/176] Cleanup syntax errors from BQSR reorganization --- .../src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 895de3578..4b56ce309 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -30,12 +30,12 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.recalibration.EventType; import java.util.ArrayList; import java.util.Arrays; From 10111450aa20010fb16301006a471ba399f9ae8a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 31 Jul 2012 15:37:22 -0400 Subject: [PATCH 041/176] Fixed AlignmentUtils bug for handling Ns in the CIGAR string. Added a UG integration test that calls a BAM with such reads (provided by a user on GetSatisfaction). --- .../sting/utils/sam/AlignmentUtils.java | 9 +++------ .../UnifiedGenotyperIntegrationTest.java | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 4b56ce309..2c388a1e0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -476,7 +476,6 @@ public class AlignmentUtils { } break; case D: - case N: if (!isDeletion) { alignmentPos += elementLength; } else { @@ -498,6 +497,7 @@ public class AlignmentUtils { break; case H: case P: + case N: break; default: throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); @@ -516,16 +516,13 @@ public class AlignmentUtils { final int elementLength = ce.getLength(); switch (ce.getOperator()) { - case I: - case S: - break; case D: case N: - alignmentLength += elementLength; - break; case M: alignmentLength += elementLength; break; + case I: + case S: case H: case P: break; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index f35eb4404..f26486a71 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -373,13 +373,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- // - // testing SnpEff + // testing MinIndelFraction // // -------------------------------------------------------------------------------------------------------------- final static String assessMinIndelFraction = baseCommandIndelsb37 + " -I " + validationDataLocation + "978604.bam -L 1:978,586-978,626 -o %s --sites_only -rf Sample -goodSM 7377 -goodSM 22-0022 -goodSM 134 -goodSM 344029-53 -goodSM 14030"; - + @Test public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -403,4 +403,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { Arrays.asList("3f07efb768e08650a7ce333edd4f9a52")); executeTest("test minIndelFraction 1.0", spec); } + + // -------------------------------------------------------------------------------------------------------------- + // + // testing Ns in CIGAR + // + // -------------------------------------------------------------------------------------------------------------- + + @Test + public void testNsInCigar() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommand + " -I " + validationDataLocation + "testWithNs.bam", 1, + Arrays.asList("22c9fd65ce3298bd7fbf400c9c209f29")); + executeTest("test calling on reads with Ns in CIGAR", spec); + } } From ab53d73459e58032567ab597a871598aab0280fd Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 31 Jul 2012 15:50:32 -0400 Subject: [PATCH 042/176] Quick fix to user error catching --- .../src/org/broadinstitute/sting/gatk/CommandLineGATK.java | 4 ++-- .../walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 306ebdd0e..312d31727 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -130,12 +130,12 @@ public class CommandLineGATK extends CommandLineExecutable { // can't close tribble index when writing if ( message.indexOf("Unable to close index for") != -1 ) - exitSystemWithUserError(new UserException(t.getCause().getMessage())); + exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage())); // disk is full if ( message.indexOf("No space left on device") != -1 ) exitSystemWithUserError(new UserException(t.getMessage())); - if ( t.getCause().getMessage().indexOf("No space left on device") != -1 ) + if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 ) exitSystemWithUserError(new UserException(t.getCause().getMessage())); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index f26486a71..d976e3e22 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -413,7 +413,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testNsInCigar() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "testWithNs.bam", 1, + "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, Arrays.asList("22c9fd65ce3298bd7fbf400c9c209f29")); executeTest("test calling on reads with Ns in CIGAR", spec); } From 6cb10cef9638fe4d57cfe6168cb11fa3be8c2230 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 31 Jul 2012 16:00:36 -0400 Subject: [PATCH 043/176] Fixed older GS reported bug. Actually, the problem really lies in Picard (can't set max records in RAM without it throwing an exception, reported on their JIRA) so I just masked out the problem by removing this never-used argument from this rarely-used tool. --- .../gatk/io/storage/SAMFileWriterStorage.java | 1 + .../sting/gatk/walkers/indels/LeftAlignIndels.java | 14 +------------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java index cb8786be1..300e801e6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java @@ -62,6 +62,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage { @Output(required=false, doc="Output bam") protected StingSAMFileWriter writer = null; - /** - * If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool - * may run out of memory. We recommend that you additionally tell Java to use a temp directory with plenty of available - * space (by setting java.io.tempdir on the command-line). - */ - @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false) - protected int MAX_RECORDS_IN_RAM = 500000; - - public void initialize() { - // set up the output writer - if ( writer != null ) - writer.setMaxRecordsInRam(MAX_RECORDS_IN_RAM); - } + public void initialize() {} private void emit(final SAMRecord read) { if ( writer != null ) From 4a23f3cd110d59f1f2cd1e82b72c15c7fdda0f2a Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 31 Jul 2012 16:34:20 -0400 Subject: [PATCH 044/176] Simple cleanup of pool caller code - since usage is much more general than just calling pools, AF calculation models and GL calculation models are renamed from Pool -> GeneralPloidy. Also, don't have users specify special arguments for -glm and -pnrm. Instead, when running UG with sample ploidy != 2, the correct general ploidy modules are automatically detected and loaded. -glm now reverts to old [SNP|INDEL|BOTH] usage --- ...GeneralPloidyExactAFCalculationModel.java} | 22 +++++++-------- ... => GeneralPloidyGenotypeLikelihoods.java} | 14 +++++----- ...yGenotypeLikelihoodsCalculationModel.java} | 14 +++++----- ...eneralPloidyIndelGenotypeLikelihoods.java} | 18 ++++++------- ...lGenotypeLikelihoodsCalculationModel.java} | 10 +++---- ... GeneralPloidySNPGenotypeLikelihoods.java} | 8 +++--- ...PGenotypeLikelihoodsCalculationModel.java} | 8 +++--- ...eralPloidyAFCalculationModelUnitTest.java} | 6 ++--- ...ralPloidyGenotypeLikelihoodsUnitTest.java} | 27 +++++++++---------- ...enotyperGeneralPloidyIntegrationTest.java} | 16 +++++------ .../AlleleFrequencyCalculationModel.java | 3 +-- .../GenotypeLikelihoodsCalculationModel.java | 7 +++-- .../walkers/genotyper/UnifiedGenotyper.java | 3 ++- .../genotyper/UnifiedGenotyperEngine.java | 27 ++++++++++++------- 14 files changed, 93 insertions(+), 90 deletions(-) rename protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolAFCalculationModel.java => GeneralPloidyExactAFCalculationModel.java} (95%) rename protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolGenotypeLikelihoods.java => GeneralPloidyGenotypeLikelihoods.java} (97%) rename protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolGenotypeLikelihoodsCalculationModel.java => GeneralPloidyGenotypeLikelihoodsCalculationModel.java} (94%) rename protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolIndelGenotypeLikelihoods.java => GeneralPloidyIndelGenotypeLikelihoods.java} (92%) rename protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolIndelGenotypeLikelihoodsCalculationModel.java => GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java} (90%) rename protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolSNPGenotypeLikelihoods.java => GeneralPloidySNPGenotypeLikelihoods.java} (97%) rename protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolSNPGenotypeLikelihoodsCalculationModel.java => GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java} (91%) rename protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolAFCalculationModelUnitTest.java => GeneralPloidyAFCalculationModelUnitTest.java} (96%) rename protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolGenotypeLikelihoodsUnitTest.java => GeneralPloidyGenotypeLikelihoodsUnitTest.java} (93%) rename protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/{PoolCallerIntegrationTest.java => UnifiedGenotyperGeneralPloidyIntegrationTest.java} (88%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java similarity index 95% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModel.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java index b8be24cad..ba19638e0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; import java.util.*; -public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { +public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel { static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them final protected UnifiedArgumentCollection UAC; @@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static boolean VERBOSE = false; - protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { + protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { super(UAC, N, logger, verboseWriter); ploidy = UAC.samplePloidy; this.UAC = UAC; @@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { for ( final double[] likelihoods : GLs ) { final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); - final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL); + final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL); // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele for (int k=1; k < acCount.length;k++) { if (acCount[k] > 0) @@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { return newPool; } - // todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods + // todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods /** * * @param set ExactACset holding conformation to be computed @@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { continue; - PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset); + GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset); } @@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { // Say L1(K) = Pr(D|AC1=K) * choose(m1,K) // and L2(K) = Pr(D|AC2=K) * choose(m2,K) - PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1); + GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1); final double[] x = originalPool.getLikelihoodsAsVector(true); while(firstIterator.hasNext()) { x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector()); firstIterator.next(); } - PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); + GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); final double[] y = yy.clone(); while(secondIterator.hasNext()) { y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector()); @@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { // initialize output to -log10(choose(m1+m2,[k1 k2...]) final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy); - final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy); + final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy); // Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K @@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount); // for current conformation, get all possible ways to break vector K into two components G1 and G2 - final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); + final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2); set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY; while (innerIterator.hasNext()) { // check if breaking current conformation into g1 and g2 is feasible. @@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { if ( numOriginalAltAlleles == numNewAltAlleles) { newLikelihoods = originalLikelihoods; } else { - newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse); + newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse); // might need to re-normalize newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); @@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel { // find the genotype with maximum likelihoods final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); - final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex); + final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex); final ArrayList alleleFreqs = new ArrayList(); final ArrayList alleleCounts = new ArrayList(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java similarity index 97% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoods.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java index 438acbacd..6b0831323 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java @@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; import java.util.*; -public abstract class PoolGenotypeLikelihoods { +public abstract class GeneralPloidyGenotypeLikelihoods { protected final int numChromosomes; private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 @@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods { private static final boolean FAST_GL_COMPUTATION = true; // constructor with given logPL elements - public PoolGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy, - final HashMap perLaneErrorModels, final boolean ignoreLaneInformation) { + public GeneralPloidyGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy, + final HashMap perLaneErrorModels, final boolean ignoreLaneInformation) { this.alleles = alleles; this.nAlleles = alleles.size(); numChromosomes = ploidy; @@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods { Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD); } else { if (logLikelihoods.length != likelihoodDim) - throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object"); + throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object"); log10Likelihoods = logLikelihoods; //.clone(); // is clone needed? } @@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods { final int numAlleles = currentState.length; final int ploidy = restrictSumTo; - linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy); + linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy); } else throw new ReviewedStingException("BUG: Not supported"); @@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods { public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes, final List originalAlleles, final List allelesToSubset) { - int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes); + int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes); double[] newPLs = new double[newPLSize]; @@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods { newCount[idx] = pVec[permutationKey[idx]]; // get corresponding index from new count - int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes); + int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes); newPLs[outputIdx] = pl; if (VERBOSE) { System.out.println("Old Key:"+Arrays.toString(pVec)); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java similarity index 94% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java index 685091678..f6ce818be 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java @@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; -public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { +public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { //protected Set laneIDs; public enum Model { @@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi final protected UnifiedArgumentCollection UAC; - protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { + protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC,logger); this.UAC = UAC; @@ -137,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi protected static class PoolGenotypeData { public final String name; - public final PoolGenotypeLikelihoods GL; + public final GeneralPloidyGenotypeLikelihoods GL; public final int depth; public final List alleles; - public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List alleles) { + public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List alleles) { this.name = name; this.GL = GL; this.depth = depth; @@ -236,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); // create the GenotypeLikelihoods object - final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO); + final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO); // actually compute likelihoods final int nGoodBases = GL.add(pileup, UAC); if ( nGoodBases > 0 ) @@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi for ( PoolGenotypeData sampleData : GLs ) { // extract from multidimensional array - final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes, + final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes, allAlleles, alleles); // normalize in log space so that max element is zero. @@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi Abstract methods - must be implemented in derived classes */ - protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, + protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, final double[] logLikelihoods, final int ploidy, final HashMap perLaneErrorModels, diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java similarity index 92% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java index 33b7b8b90..4f42f820e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java @@ -18,7 +18,7 @@ import java.util.*; * Time: 10:06 AM * To change this template use File | Settings | File Templates. */ -public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { +public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods { final PairHMMIndelErrorModel pairModel; final LinkedHashMap haplotypeMap; final ReferenceContext refContext; @@ -27,14 +27,14 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { final byte refBase; - public PoolIndelGenotypeLikelihoods(final List alleles, - final double[] logLikelihoods, - final int ploidy, - final HashMap perLaneErrorModels, - final boolean ignoreLaneInformation, - final PairHMMIndelErrorModel pairModel, - final LinkedHashMap haplotypeMap, - final ReferenceContext referenceContext) { + public GeneralPloidyIndelGenotypeLikelihoods(final List alleles, + final double[] logLikelihoods, + final int ploidy, + final HashMap perLaneErrorModels, + final boolean ignoreLaneInformation, + final PairHMMIndelErrorModel pairModel, + final LinkedHashMap haplotypeMap, + final ReferenceContext referenceContext) { super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation); this.pairModel = pairModel; this.haplotypeMap = haplotypeMap; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java similarity index 90% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java index 1fef76116..5eeadddae 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java @@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; -public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel { +public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel { private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4; private PairHMMIndelErrorModel pairModel; @@ -59,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi } */ - protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) { + protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) { super(UAC, logger); @@ -69,14 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi } - protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, + protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, final double[] logLikelihoods, final int ploidy, final HashMap perLaneErrorModels, final boolean useBQAedPileup, final ReferenceContext ref, final boolean ignoreLaneInformation){ - return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref); + return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref); } protected List getInitialAllelesToUse(final RefMetaDataTracker tracker, diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java similarity index 97% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java index f763392ae..944372907 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java @@ -23,7 +23,7 @@ import static java.lang.Math.pow; * and posteriors given a pile of bases and quality scores * */ -public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ { +public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ { final List myAlleles; final int[] alleleIndices; @@ -42,8 +42,8 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem * @param useBQAedPileup Use BAQed pileup * @param ignoreLaneInformation If true, lane info is ignored */ - public PoolSNPGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy, - final HashMap perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) { + public GeneralPloidySNPGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy, + final HashMap perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) { super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation); this.useBAQedPileup = useBQAedPileup; @@ -52,7 +52,7 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem Allele refAllele = alleles.get(0); //sanity check: by construction, first allele should ALWAYS be the reference alleles if (!refAllele.isReference()) - throw new ReviewedStingException("BUG: First allele in list passed to PoolSNPGenotypeLikelihoods should be reference!"); + throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!"); refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref! diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java similarity index 91% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoodsCalculationModel.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java index 61f505445..30d614455 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolSNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java @@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; -public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel { +public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel { - protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC, Logger logger) { + protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); } - protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, + protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, final double[] logLikelihoods, final int ploidy, final HashMap perLaneErrorModels, final boolean useBQAedPileup, final ReferenceContext ref, final boolean ignoreLaneInformation) { - return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO); + return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO); } protected List getInitialAllelesToUse(final RefMetaDataTracker tracker, diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java similarity index 96% rename from protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModelUnitTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java index 5a6f7df0f..983f562d2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolAFCalculationModelUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyAFCalculationModelUnitTest.java @@ -19,7 +19,7 @@ import java.util.Arrays; * Time: 7:44 AM * To change this template use File | Settings | File Templates. */ -public class PoolAFCalculationModelUnitTest extends BaseTest { +public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest { static double[] AA1, AB1, BB1; static double[] AA2, AB2, AC2, BB2, BC2, CC2; @@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest { public void testGLs(GetGLsTest cfg) { final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles); - final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size()); + final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size()); double[] priors = new double[len]; // flat priors - PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result); + GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result); int nameIndex = 1; for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java similarity index 93% rename from protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java index 5d78dd248..f95ba66b2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; @@ -42,7 +41,7 @@ import java.io.PrintStream; import java.util.*; -public class PoolGenotypeLikelihoodsUnitTest { +public class GeneralPloidyGenotypeLikelihoodsUnitTest { final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); final Logger logger = Logger.getLogger(Walker.class); @@ -61,7 +60,7 @@ public class PoolGenotypeLikelihoodsUnitTest { public void testStoringLikelihoodElements() { - // basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back + // basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back int ploidy = 20; int numAlleles = 4; @@ -79,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest { for (int k=0; k < gls.length; k++) gls[k]= (double)k; - PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true); + GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true); double[] glnew = gl.getLikelihoods(); Assert.assertEquals(gls, glnew); @@ -91,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest { for (int ploidy = 2; ploidy < 10; ploidy++) { for (int nAlleles = 2; nAlleles < 10; nAlleles++) - Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy), + Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy), GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy)); } @@ -103,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // create iterator, compare linear index given by iterator with closed form function int numAlleles = 4; int ploidy = 2; - PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy); + GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy); while(iterator.hasNext()) { System.out.format("\n%d:",iterator.getLinearIndex()); @@ -112,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest { System.out.format("%d ",aa); - int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy); + int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy); System.out.format("Computed idx = %d\n",computedIdx); iterator.next(); } @@ -141,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest { allelesToSubset.add(Allele.create("A",false)); allelesToSubset.add(Allele.create("C",false)); - double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy, + double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy, originalAlleles, allelesToSubset); @@ -171,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest { @Test public void testIndexIterator() { int[] seed = new int[]{1,2,3,4}; - PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1); + GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1); // Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed)); Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1); @@ -229,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest { } - private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) { - PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo); + private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) { + GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo); while(iterator.hasNext()) { int[] a = iterator.getCurrentVector(); - int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo); + int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo); if (VERBOSE) { System.out.format("%d:",iterator.getLinearIndex()); for (int i=0; i < seed.length; i++) @@ -454,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // get now likelihoods for this - final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true); + final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true); final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE); if (VERBOSE) { System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool); @@ -483,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // get now likelihoods for this - final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true); + final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true); noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE); mlPair = noisyGL.getMostLikelyACCount(); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java similarity index 88% rename from protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index acf95c906..9b3103274 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -12,7 +12,7 @@ import org.testng.annotations.Test; * Time: 11:28 AM * To change this template use File | Settings | File Templates. */ -public class PoolCallerIntegrationTest extends WalkerTest { +public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { final static String REF = b37KGReference; final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list"; final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam"; @@ -24,21 +24,21 @@ public class PoolCallerIntegrationTest extends WalkerTest { final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf"; final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf"; private void PC_MT_Test(String bam, String args, String name, String md5) { - final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL", + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ", REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testPoolCaller:"+name+" args=" + args, spec); } private void PC_LSV_Test(String args, String name, String model, String md5) { - final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL", + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ", REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testPoolCaller:"+name+" args=" + args, spec); } private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) { - final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL", + final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane", REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testPoolCaller:"+name+" args=" + args, spec); @@ -46,22 +46,22 @@ public class PoolCallerIntegrationTest extends WalkerTest { @Test public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","d76e3b910259da819f1e1b2adc68ba8d"); + PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d"); } @Test public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","ffadcdaee613dab975197bed0fc78da3"); + PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3"); } @Test public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","96087fe9240e3656cc2a4e0ff0174d5b"); + PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b"); } @Test public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","6fdae7093831ecfc82a06dd707d62fe9"); + PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9"); } @Test diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 432bbd6d7..08a333486 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { public enum Model { /** The default model with the best performance in all cases */ - EXACT, - POOL + EXACT } protected int N; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 4253ff3ad..6fdc926d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { public enum Model { SNP, INDEL, - BOTH, - POOLSNP, - POOLINDEL, - POOLBOTH + GeneralPloidySNP, + GeneralPloidyINDEL, + BOTH } public enum GENOTYPING_MODE { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 6b279fd95..c1c1339f5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif } else { // in full mode: check for consistency in ploidy/pool calling arguments // check for correct calculation models - if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { +/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { // polyploidy requires POOL GL and AF calculation models to be specified right now if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) { @@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2"); } + */ // get all of the unique sample names if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) { samples.clear(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index f73ab2471..f4bd196ae 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -50,6 +50,7 @@ import java.util.*; public class UnifiedGenotyperEngine { public static final String LOW_QUAL_FILTER_NAME = "LowQual"; + private static final String GPSTRING = "GeneralPloidy"; public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; @@ -273,7 +274,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); + return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -640,6 +641,9 @@ public class UnifiedGenotyperEngine { if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH",""); + if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) + modelPrefix = GPSTRING + modelPrefix; + // if we're genotyping given alleles and we have a requested SNP at this position, do SNP if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); @@ -648,17 +652,13 @@ public class UnifiedGenotyperEngine { if ( vcInput.isSNP() ) { // ignore SNPs if the user chose INDEL mode only - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) + if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") ) models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); - else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") ) - models.add(UAC.GLmodel); - } + } else if ( vcInput.isIndel() || vcInput.isMixed() ) { // ignore INDELs if the user chose SNP mode only - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) + if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") ) models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); - else if (UAC.GLmodel.name().toUpperCase().contains("INDEL")) - models.add(UAC.GLmodel); } // No support for other types yet } @@ -668,7 +668,7 @@ public class UnifiedGenotyperEngine { models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); } else { - models.add(UAC.GLmodel); + models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase())); } } @@ -730,12 +730,19 @@ public class UnifiedGenotyperEngine { } private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) { + List> afClasses = new PluginManager(AlleleFrequencyCalculationModel.class).getPlugins(); + // user-specified name + String afModelName = UAC.AFmodel.name(); + + if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) + afModelName = GPSTRING + afModelName; + for (int i = 0; i < afClasses.size(); i++) { Class afClass = afClasses.get(i); String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase(); - if (UAC.AFmodel.name().equalsIgnoreCase(key)) { + if (afModelName.equalsIgnoreCase(key)) { try { Object args[] = new Object[]{UAC,N,logger,verboseWriter}; Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class); From a4a41458efc3b3745356b0ddc89fceee4cef2137 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 1 Aug 2012 10:33:41 -0400 Subject: [PATCH 045/176] Update docs of FastaAlternateReferenceMaker as promised in older GS thread --- .../sting/gatk/walkers/fasta/FastaAlternateReference.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java index 92549b821..28cfdd5cd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java @@ -47,7 +47,8 @@ import java.util.List; *

* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s). * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'. - * Note that if there are multiple variants at a site, it takes the first one seen. + * Note that if there are multiple variants at a site, it chooses one of them randomly. + * Also note that this tool works only for SNPs and for simple indels (but not for things like complex substitutions). * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order). * *

Input

From 459832ee161f88631b56681a0dfdcc9fbe224cf9 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 1 Aug 2012 10:45:04 -0400 Subject: [PATCH 046/176] Fixed bug in FastaAlternateReferenceMaker when input VCF has overlapping deletions as reported a while back on GS --- .../sting/gatk/walkers/fasta/FastaAlternateReference.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java index 28cfdd5cd..8fbd37e30 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReference.java @@ -47,8 +47,10 @@ import java.util.List; *

* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s). * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'. - * Note that if there are multiple variants at a site, it chooses one of them randomly. - * Also note that this tool works only for SNPs and for simple indels (but not for things like complex substitutions). + * Several important notes: + * 1) if there are multiple variants that start at a site, it chooses one of them randomly. + * 2) when there are overlapping indels (but with different start positions) only the first will be chosen. + * 3) this tool works only for SNPs and for simple indels (but not for things like complex substitutions). * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order). * *

Input

@@ -103,7 +105,7 @@ public class FastaAlternateReference extends FastaReference { String refBase = String.valueOf((char)ref.getBase()); // Check to see if we have a called snp - for ( VariantContext vc : tracker.getValues(variants) ) { + for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) { if ( vc.isFiltered() ) continue; From 2b25df3d538ca4e1197786a500642c35a1b367b1 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Mon, 30 Jul 2012 10:19:57 -0400 Subject: [PATCH 047/176] Add removeProgramRecords argument * Add unit test for the removeProgramRecords --- .../sting/gatk/GenomeAnalysisEngine.java | 3 +- .../arguments/GATKArgumentCollection.java | 3 + .../gatk/datasources/reads/SAMDataSource.java | 21 +++++- .../reads/SAMDataSourceUnitTest.java | 73 +++++++++++++++++++ 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 5d6fb75ed..228f8351a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -813,7 +813,8 @@ public class GenomeAnalysisEngine { getWalkerBAQQualityMode(), refReader, getBaseRecalibration(), - argCollection.defaultBaseQualities); + argCollection.defaultBaseQualities, + argCollection.removeProgramRecords); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 3fd3857c5..91a008cbc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -249,6 +249,9 @@ public class GATKArgumentCollection { @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; + @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we remove program records from the SAM header", required = false) + public boolean removeProgramRecords = false; + @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) public ValidationExclusion.TYPE unsafe; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 0fa4234b3..7f0a0c4c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -89,6 +89,11 @@ public class SAMDataSource { */ private final SAMFileReader.ValidationStringency validationStringency; + /** + * Do we want to remove the program records from this data source? + */ + private final boolean removeProgramRecords; + /** * Store BAM indices for each reader present. */ @@ -200,7 +205,8 @@ public class SAMDataSource { BAQ.QualityMode.DONT_MODIFY, null, // no BAQ null, // no BQSR - (byte) -1); + (byte) -1, + false); } /** @@ -233,7 +239,8 @@ public class SAMDataSource { BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, BaseRecalibration bqsrApplier, - byte defaultBaseQualities) { + byte defaultBaseQualities, + boolean removeProgramRecords) { this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; @@ -249,6 +256,7 @@ public class SAMDataSource { dispatcher = null; validationStringency = strictness; + this.removeProgramRecords = removeProgramRecords; if(readBufferSize != null) ReadShard.setReadBufferSize(readBufferSize); else { @@ -748,7 +756,7 @@ public class SAMDataSource { private synchronized void createNewResource() { if(allResources.size() > maxEntries) throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use."); - SAMReaders readers = new SAMReaders(readerIDs, validationStringency); + SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords); allResources.add(readers); availableResources.add(readers); } @@ -777,9 +785,11 @@ public class SAMDataSource { /** * Derive a new set of readers from the Reads metadata. * @param readerIDs reads to load. + * TODO: validationStringency is not used here * @param validationStringency validation stringency. + * @param removeProgramRecords indicate whether to clear program records from the readers */ - public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { + public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) { final int totalNumberOfFiles = readerIDs.size(); int readerNumber = 1; final SimpleTimer timer = new SimpleTimer().start(); @@ -790,6 +800,9 @@ public class SAMDataSource { long lastTick = timer.currentTime(); for(final SAMReaderID readerID: readerIDs) { final ReaderInitializer init = new ReaderInitializer(readerID).call(); + if (removeProgramRecords) { + init.reader.getFileHeader().setProgramRecords(new ArrayList()); + } if (threadAllocation.getNumIOThreads() > 0) { inputStreams.put(init.readerID, init.blockInputStream); // get from initializer } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java index 1c5dab254..f2c546317 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java @@ -24,9 +24,12 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMProgramRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.Tags; @@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.AfterMethod; @@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest { fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception"); } } + + /** Test that we clear program records when requested */ + @Test + public void testRemoveProgramRecords() { + logger.warn("Executing testRemoveProgramRecords"); + + // setup the data + readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags())); + + // use defaults + SAMDataSource data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false); + + List defaultProgramRecords = data.getHeader().getProgramRecords(); + assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor"); + + boolean removeProgramRecords = false; + data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, // no BAQ + null, // no BQSR + (byte) -1, + removeProgramRecords); + + List dontRemoveProgramRecords = data.getHeader().getProgramRecords(); + assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false"); + + removeProgramRecords = true; + data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, // no BAQ + null, // no BQSR + (byte) -1, + removeProgramRecords); + + List doRemoveProgramRecords = data.getHeader().getProgramRecords(); + assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true"); + } } From ccac77d888f3a0c2cd5e3d8d2c0e7c13c21596af Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 1 Aug 2012 11:57:16 -0400 Subject: [PATCH 048/176] Bugfix for incorrect allele counting in IndelSummary -- Previous version would count all alt alleles as present in a sample, even if only 1 were present, because of the way VariantEval subsetted VCs -- Updated code for subsetting VCs by sample to be clearer about how it handles rederiving alleles -- Update a few pieces of code to get previous correct behavior -- Updated a few MD5s as now ref calls at sites in dbSNP are counted as having a comp sites, and therefore show up in known sites when Novelty strat is on (which I think is correct) -- Walkers that used old subsetting function with true are now using clearer version that does rederive alleles by default --- .../walkers/phasing/ReadBackedPhasing.java | 2 +- .../GLBasedSampleSelector.java | 2 +- .../GTBasedSampleSelector.java | 2 +- .../gatk/walkers/varianteval/VariantEval.java | 5 ++++- .../evaluators/TiTvVariantEvaluator.java | 9 ++++++--- .../varianteval/stratifications/Novelty.java | 2 +- .../varianteval/util/VariantEvalUtils.java | 12 +++++------- .../utils/variantcontext/VariantContext.java | 18 +++++++++++++++--- .../VariantEvalIntegrationTest.java | 6 +++--- .../VariantContextBenchmark.java | 2 +- 10 files changed, 38 insertions(+), 22 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index f16deb701..f49e8f8c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker samplesToPhase) { // for ( String sample : samplesToPhase ) // logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() )); - VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true); + VariantContext subvc = vc.subContextFromSamples(samplesToPhase); // logger.debug("original VC = " + vc); // logger.debug("sub VC = " + subvc); return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java index e54dc6388..3e48520a7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java @@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector { return true; // want to include a site in the given samples if it is *likely* to be variant (via the EXACT model) // first subset to the samples - VariantContext subContext = vc.subContextFromSamples(samples, true); + VariantContext subContext = vc.subContextFromSamples(samples); // now check to see (using EXACT model) whether this should be variant // do we want to apply a prior? maybe user-spec? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java index 0f55524a6..de832b108 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java @@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{ if ( samples == null || samples.isEmpty() ) return true; - VariantContext subContext = vc.subContextFromSamples(samples, false); + VariantContext subContext = vc.subContextFromSamples(samples); if ( subContext.isPolymorphicInSamples() ) { return true; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java index 0b395bc62..58cd14737 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java @@ -500,7 +500,10 @@ public class VariantEval extends RodWalker implements TreeRedu @Requires({"eval != null", "comp != null"}) private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) { - // find all of the matching comps + if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION ) + // if either of these are NO_VARIATION they are LENIENT matches + return EvalCompMatchType.LENIENT; + if ( comp.getType() != eval.getType() ) return EvalCompMatchType.NO_MATCH; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index 6c4fcd26d..fe2437976 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv } } - public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (vc1 != null) updateTiTv(vc1, false); - if (vc2 != null) updateTiTv(vc2, true); + @Override + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if (eval != null) + updateTiTv(eval, false); + if (comp != null) + updateTiTv(comp, true); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 693bdf198..2ad08d806 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification final Collection knownComps = tracker.getValues(knowns, ref.getLocus()); for ( final VariantContext c : knownComps ) { // loop over sites, looking for something that matches the type eval - if ( eval.getType() == c.getType() ) { + if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) { return KNOWN_STATES; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 3dcc1f85f..e84b0b10e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -197,7 +197,9 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested samples */ public VariantContext getSubsetOfVariantContext(VariantContext vc, Set sampleNames) { - return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false)); + // if we want to preserve AC0 sites as polymorphic we need to not rederive alleles + final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites(); + return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles)); } public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) { @@ -262,12 +264,8 @@ public class VariantEvalUtils { // First, filter the VariantContext to represent only the samples for evaluation VariantContext vcsub = vc; - if (subsetBySample && vc.hasGenotypes()) { - if ( variantEvalWalker.isSubsettingToSpecificSamples() ) - vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); - else - vcsub = ensureAnnotations(vc, vc); - } + if (subsetBySample && vc.hasGenotypes()) + vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation()); if ((byFilter || !vcsub.isFiltered())) { addMapping(mapping, VariantEval.getAllSampleName(), vcsub); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 979400350..2211cfe5e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio * in this VC is returned as the set of alleles in the subContext, even if * some of those alleles aren't in the samples * + * WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING? + * * @param sampleNames the sample names - * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples + * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default * @return new VariantContext subsetting to just the given samples */ public VariantContext subContextFromSamples(Set sampleNames, final boolean rederiveAllelesFromGenotypes ) { - if ( sampleNames.containsAll(getSampleNames()) ) { + if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) { return this; // fast path when you don't have any work to do } else { VariantContextBuilder builder = new VariantContextBuilder(this); @@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio } } + /** + * @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true + * + * @param sampleNames + * @return + */ + public VariantContext subContextFromSamples(final Set sampleNames) { + return subContextFromSamples(sampleNames, true); + } + public VariantContext subContextFromSample(String sampleName) { - return subContextFromSamples(Collections.singleton(sampleName), true); + return subContextFromSamples(Collections.singleton(sampleName)); } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index d9a91c4c2..94e52c2b9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -34,7 +34,7 @@ import java.util.Arrays; import java.util.List; public class VariantEvalIntegrationTest extends WalkerTest { - private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/"; + private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/"; private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf"; private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf"; @@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0") + Arrays.asList("40abbc9be663aed8ee1158f832463ca8") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("087a2d9943c53e7f49663667c3305c7e") + Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java index 7c522eadf..0e5522e3a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java @@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark { public void run(final VariantContext vc) { if ( samples == null ) samples = new HashSet(new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake)); - VariantContext sub = vc.subContextFromSamples(samples, true); + VariantContext sub = vc.subContextFromSamples(samples); sub.getNSamples(); } }; From c3c3d18611f06e2831d3ed39263b44ed106f0df1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 1 Aug 2012 17:09:22 -0400 Subject: [PATCH 050/176] Update BCF2 to put PASS as offset 0 not at the end -- Unfortunately this commit breaks backward compatibility with all existing BCF2 files... --- .../sting/utils/codecs/bcf2/BCF2Decoder.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Utils.java | 10 +++------- .../utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java | 2 +- .../sting/utils/codecs/vcf/VCFIntegrationTest.java | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index a13be21c5..2619a4dae 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -202,7 +202,7 @@ public final class BCF2Decoder { return null; else { final String s = new String(bytes, 0, goodLength); - return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s; + return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s; } } catch ( IOException e ) { throw new ReviewedStingException("readByte failure", e); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 43e933948..1beacd070 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -79,23 +79,19 @@ public final class BCF2Utils { final Set seen = new HashSet(); final ArrayList dict = new ArrayList(); - boolean sawPASS = false; + dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field + // set up the strings dictionary for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) { final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line; if ( ! seen.contains(idLine.getID())) { - sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4); dict.add(idLine.getID()); seen.add(idLine.getID()); } } } - - if ( ! sawPASS ) - dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field - return dict; } @@ -177,7 +173,7 @@ public final class BCF2Utils { */ @Requires({"collapsed != null", "isCollapsedString(collapsed)"}) @Ensures("result != null") - public static final List exploreStringList(final String collapsed) { + public static final List explodeStringList(final String collapsed) { assert isCollapsedString(collapsed); final String[] exploded = collapsed.substring(1).split(","); return Arrays.asList(exploded); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java index a0feef186..7569ce90d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java @@ -351,7 +351,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { public void testEncodingListOfString(List strings, String expected) throws IOException { final String collapsed = BCF2Utils.collapseStringList(strings); Assert.assertEquals(collapsed, expected); - Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings); + Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings); } // ----------------------------------------------------------------- diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 3948ba971..86c8c968d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -77,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest { String testVCF = privateTestDir + "ex2.vcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f")); executeTest("Test writing samtools WEx BCF example", spec1); } From e3f89fb0549c8f2c751338ffeeda76678c4d94c4 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 2 Aug 2012 11:33:21 -0400 Subject: [PATCH 052/176] Missing/malformed GATK report files are user errors --- .../src/org/broadinstitute/sting/gatk/report/GATKReport.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index bec1ea543..47bc48f81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -89,9 +89,9 @@ public class GATKReport { reader = new BufferedReader(new FileReader(file)); reportHeader = reader.readLine(); } catch (FileNotFoundException e) { - throw new ReviewedStingException("Could not open file : " + file); + throw new UserException.CouldNotReadInputFile(file, "it does not exist"); } catch (IOException e) { - throw new ReviewedStingException("Could not read file : " + file); + throw new UserException.CouldNotReadInputFile(file, e); } From fb5dabce18bbcd06ce60392ff1380faec50d8c08 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 2 Aug 2012 16:44:39 -0400 Subject: [PATCH 053/176] Update BCF2 to include a minor version number so we can rev (and report errors) with BCF2 -- We are no likely to fail with an error when reading old BCF files, rather than just giving bad results -- Added new class BCFVersion that consolidates all of the version management of BCF --- .../sting/utils/codecs/bcf2/BCF2Codec.java | 22 ++++- .../sting/utils/codecs/bcf2/BCF2Utils.java | 44 +++++----- .../sting/utils/codecs/bcf2/BCFVersion.java | 80 +++++++++++++++++++ .../variantcontext/writer/BCF2Writer.java | 6 +- .../utils/codecs/vcf/VCFIntegrationTest.java | 2 +- 5 files changed, 122 insertions(+), 32 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0f9cc34e7..0776b3aa8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec { final protected static Logger logger = Logger.getLogger(BCF2Codec.class); private final static boolean FORBID_SYMBOLICS = false; + private final static int ALLOWED_MAJOR_VERSION = 2; + private final static int MIN_MINOR_VERSION = 1; + + private BCFVersion bcfVersion = null; + private VCFHeader header = null; /** @@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec { public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) { try { // note that this reads the magic as well, and so does double duty - if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) ) - error("Input stream does not begin with BCF2 magic"); + bcfVersion = BCFVersion.readBCFVersion(inputStream); + if ( bcfVersion == null ) + error("Input stream does not contain a BCF encoded file; BCF magic header info not found"); + + if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION ) + error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion()); + if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION ) + error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion()); + + logger.info("BCF version " + bcfVersion); final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream); @@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec { FileInputStream fis = null; try { fis = new FileInputStream(path); - return BCF2Utils.startsWithBCF2Magic(fis); + final BCFVersion version = BCFVersion.readBCFVersion(fis); + return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION; } catch ( FileNotFoundException e ) { return false; } catch ( IOException e ) { @@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec { try { if ( fis != null ) fis.close(); } catch ( IOException e ) { - ; // do nothing + // do nothing } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 1beacd070..d3f3fc102 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -41,8 +41,6 @@ import java.util.*; * @since 5/12 */ public final class BCF2Utils { - public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes(); - public static final int MAX_ALLELES_IN_GENOTYPES = 127; public static final int OVERFLOW_ELEMENT_MARKER = 15; @@ -75,7 +73,7 @@ public final class BCF2Utils { */ @Requires("header != null") @Ensures({"result != null", "new HashSet(result).size() == result.size()"}) - public final static ArrayList makeDictionary(final VCFHeader header) { + public static ArrayList makeDictionary(final VCFHeader header) { final Set seen = new HashSet(); final ArrayList dict = new ArrayList(); @@ -96,43 +94,37 @@ public final class BCF2Utils { } @Requires({"nElements >= 0", "type != null"}) - public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { + public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER); byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F)); return typeByte; } @Ensures("result >= 0") - public final static int decodeSize(final byte typeDescriptor) { + public static int decodeSize(final byte typeDescriptor) { return (0xF0 & typeDescriptor) >> 4; } @Ensures("result >= 0") - public final static int decodeTypeID(final byte typeDescriptor) { + public static int decodeTypeID(final byte typeDescriptor) { return typeDescriptor & 0x0F; } @Ensures("result != null") - public final static BCF2Type decodeType(final byte typeDescriptor) { + public static BCF2Type decodeType(final byte typeDescriptor) { return ID_TO_ENUM[decodeTypeID(typeDescriptor)]; } - public final static boolean sizeIsOverflow(final byte typeDescriptor) { + public static boolean sizeIsOverflow(final byte typeDescriptor) { return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER; } @Requires("nElements >= 0") - public final static boolean willOverflow(final long nElements) { + public static boolean willOverflow(final long nElements) { return nElements > MAX_INLINE_ELEMENTS; } - public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException { - final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length]; - stream.read(magicBytes); - return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE); - } - - public final static byte readByte(final InputStream stream) { + public static byte readByte(final InputStream stream) { // TODO -- shouldn't be capturing error here try { return (byte)(stream.read() & 0xFF); @@ -151,7 +143,7 @@ public final class BCF2Utils { */ @Requires({"strings != null", "strings.size() > 1"}) @Ensures("result != null") - public static final String collapseStringList(final List strings) { + public static String collapseStringList(final List strings) { final StringBuilder b = new StringBuilder(); for ( final String s : strings ) { if ( s != null ) { @@ -173,14 +165,14 @@ public final class BCF2Utils { */ @Requires({"collapsed != null", "isCollapsedString(collapsed)"}) @Ensures("result != null") - public static final List explodeStringList(final String collapsed) { + public static List explodeStringList(final String collapsed) { assert isCollapsedString(collapsed); final String[] exploded = collapsed.substring(1).split(","); return Arrays.asList(exploded); } @Requires("s != null") - public static final boolean isCollapsedString(final String s) { + public static boolean isCollapsedString(final String s) { return s.charAt(0) == ','; } @@ -222,7 +214,7 @@ public final class BCF2Utils { } @Ensures("result.isIntegerType()") - public final static BCF2Type determineIntegerType(final int value) { + public static BCF2Type determineIntegerType(final int value) { for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) { if ( potentialType.withinRange(value) ) return potentialType; @@ -232,7 +224,7 @@ public final class BCF2Utils { } @Ensures("result.isIntegerType()") - public final static BCF2Type determineIntegerType(final int[] values) { + public static BCF2Type determineIntegerType(final int[] values) { // literally a copy of the code below, but there's no general way to unify lists and arrays in java BCF2Type maxType = BCF2Type.INT8; for ( final int value : values ) { @@ -258,7 +250,7 @@ public final class BCF2Utils { */ @Requires({"t1.isIntegerType()","t2.isIntegerType()"}) @Ensures("result.isIntegerType()") - public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) { + public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) { switch ( t1 ) { case INT8: return t2; case INT16: return t2 == BCF2Type.INT32 ? t2 : t1; @@ -268,7 +260,7 @@ public final class BCF2Utils { } @Ensures("result.isIntegerType()") - public final static BCF2Type determineIntegerType(final List values) { + public static BCF2Type determineIntegerType(final List values) { BCF2Type maxType = BCF2Type.INT8; for ( final int value : values ) { final BCF2Type type1 = determineIntegerType(value); @@ -293,7 +285,7 @@ public final class BCF2Utils { * @param o * @return */ - public final static List toList(final Object o) { + public static List toList(final Object o) { if ( o == null ) return Collections.emptyList(); else if ( o instanceof List ) return (List)o; else return Collections.singletonList(o); @@ -301,7 +293,7 @@ public final class BCF2Utils { @Requires({"stream != null", "bytesForEachInt > 0"}) - public final static int readInt(int bytesForEachInt, final InputStream stream) { + public static int readInt(int bytesForEachInt, final InputStream stream) { switch ( bytesForEachInt ) { case 1: { return (byte)(readByte(stream)); @@ -319,7 +311,7 @@ public final class BCF2Utils { } } - public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException { + public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException { switch ( type.getSizeInBytes() ) { case 1: encodeStream.write(0xFF & value); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java new file mode 100644 index 000000000..742da7c0c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java @@ -0,0 +1,80 @@ +package org.broadinstitute.sting.utils.codecs.bcf2; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; + +/** + * Simple holder for BCF version information + * + * User: depristo + * Date: 8/2/12 + * Time: 2:16 PM + */ +public class BCFVersion { + /** + * BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2) + * and m is the minor version, currently 1 + */ + public static final byte[] MAGIC_HEADER_START = "BCF".getBytes(); + + final int majorVersion; + final int minorVersion; + + public BCFVersion(int majorVersion, int minorVersion) { + this.majorVersion = majorVersion; + this.minorVersion = minorVersion; + } + + /** + * @return the major version number of this BCF file + */ + public int getMajorVersion() { + return majorVersion; + } + + /** + * @return the minor version number of this BCF file + */ + public int getMinorVersion() { + return minorVersion; + } + + /** + * Return a new BCFVersion object describing the major and minor version of the BCF file in stream + * + * Note that stream must be at the very start of the file. + * + * @param stream + * @return a BCFVersion object, or null if stream doesn't contain a BCF file + * @throws IOException + */ + public static BCFVersion readBCFVersion(final InputStream stream) throws IOException { + final byte[] magicBytes = new byte[MAGIC_HEADER_START.length]; + stream.read(magicBytes); + if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) { + // we're a BCF file + final int majorByte = stream.read(); + final int minorByte = stream.read(); + return new BCFVersion( majorByte, minorByte ); + } else + return null; + } + + /** + * Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions + * @param out + * @throws IOException + */ + public void write(final OutputStream out) throws IOException { + out.write(MAGIC_HEADER_START); + out.write(getMajorVersion() & 0xFF); + out.write(getMinorVersion() & 0xFF); + } + + @Override + public String toString() { + return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index b5da206ad..32377d09e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -31,6 +31,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; +import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -83,6 +84,9 @@ import java.util.*; * @since 06/12 */ class BCF2Writer extends IndexingVariantContextWriter { + public static final int MAJOR_VERSION = 2; + public static final int MINOR_VERSION = 1; + /** * If true, we will write out the undecoded raw bytes for a genotypes block, if it * is found in the input VC. This can be very dangerous as the genotype encoding @@ -153,7 +157,7 @@ class BCF2Writer extends IndexingVariantContextWriter { writer.close(); final byte[] headerBytes = capture.toByteArray(); - outputStream.write(BCF2Utils.MAGIC_HEADER_LINE); + new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream); BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream); outputStream.write(headerBytes); } catch (IOException e) { diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 86c8c968d..71fc1d464 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab")); executeTest("Test reading and writing 1000G Phase I SVs", spec1); } From e04989f76d3b2771f83cb33900c31a5518503e36 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 3 Aug 2012 09:42:21 -0400 Subject: [PATCH 055/176] Bugfix for new PASS position in dictionary in BCF2 --- .../org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index d3f3fc102..c79abe2ae 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -77,7 +77,9 @@ public final class BCF2Utils { final Set seen = new HashSet(); final ArrayList dict = new ArrayList(); - dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field + // special case the special PASS field which doesn't show up in the FILTER field definitions + seen.add(VCFConstants.PASSES_FILTERS_v4); + dict.add(VCFConstants.PASSES_FILTERS_v4); // set up the strings dictionary for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { From 524d7ea306d18a687d9dd3ba2454746bab63a936 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Tue, 31 Jul 2012 15:09:36 -0400 Subject: [PATCH 056/176] Choose whether to keep program records based on Walker * Add keepProgramRecords argument * Make removeProgramRecords / keepProgramRecords override default --- .../sting/gatk/GenomeAnalysisEngine.java | 11 ++++++++++- .../sting/gatk/arguments/GATKArgumentCollection.java | 5 ++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 228f8351a..967fd3236 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -797,6 +797,15 @@ public class GenomeAnalysisEngine { if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); + if (argCollection.removeProgramRecords && argCollection.keepProgramRecords) + throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options"); + + // LocusWalkers don't use program records, so remove them by default to save memory + boolean removeProgramRecords = (this.walker instanceof LocusWalker) || argCollection.removeProgramRecords; + + if (argCollection.keepProgramRecords) + removeProgramRecords = false; + return new SAMDataSource( samReaderIDs, threadAllocation, @@ -814,7 +823,7 @@ public class GenomeAnalysisEngine { refReader, getBaseRecalibration(), argCollection.defaultBaseQualities, - argCollection.removeProgramRecords); + removeProgramRecords); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 91a008cbc..972116952 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -249,9 +249,12 @@ public class GATKArgumentCollection { @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; - @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we remove program records from the SAM header", required = false) + @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false) public boolean removeProgramRecords = false; + @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false) + public boolean keepProgramRecords = false; + @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) public ValidationExclusion.TYPE unsafe; From addbfd6437a47c1f3b9f9c6ffd51184561ed08dc Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Thu, 2 Aug 2012 14:21:30 -0400 Subject: [PATCH 057/176] Add a RemoveProgramRecords annotation * Add the RemoveProgramRecords annotation to LocusWalker --- .../sting/gatk/GenomeAnalysisEngine.java | 3 +-- .../sting/gatk/walkers/LocusWalker.java | 1 + .../gatk/walkers/RemoveProgramRecords.java | 21 +++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 967fd3236..56fcf0652 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -800,8 +800,7 @@ public class GenomeAnalysisEngine { if (argCollection.removeProgramRecords && argCollection.keepProgramRecords) throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options"); - // LocusWalkers don't use program records, so remove them by default to save memory - boolean removeProgramRecords = (this.walker instanceof LocusWalker) || argCollection.removeProgramRecords; + boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class); if (argCollection.keepProgramRecords) removeProgramRecords = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index e94d01d5a..2a92d8831 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) @PartitionBy(PartitionType.LOCUS) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) +@RemoveProgramRecords public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java new file mode 100644 index 000000000..d9abc7925 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RemoveProgramRecords.java @@ -0,0 +1,21 @@ +package org.broadinstitute.sting.gatk.walkers; + +/** + * Created with IntelliJ IDEA. + * User: thibault + * Date: 8/2/12 + * Time: 1:58 PM + * To change this template use File | Settings | File Templates. + */ + +import java.lang.annotation.*; + +/** + * Indicates that program records should be removed from SAM headers by default for this walker + */ +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface RemoveProgramRecords { +} From 51bd03cc36cd709b0a5a070246729858bba5bc54 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Thu, 2 Aug 2012 14:46:00 -0400 Subject: [PATCH 058/176] Add RemoveProgramRecords annotation to ActiveRegionWalker --- .../broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index b2975cbbf..aba508b3e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -32,6 +32,7 @@ import java.util.List; @PartitionBy(PartitionType.READ) @ActiveRegionExtension(extension=50,maxRegion=1500) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) +@RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) From 9e25b209e0ef10cc63dd17f2abf9e61dfbf1b47e Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Fri, 3 Aug 2012 12:24:23 -0400 Subject: [PATCH 059/176] First pass of implementation of Reduced Reads with HaplotypeCaller. Main changes: a) Active region: scale PL's by representative count to determine whether region is active. b) Scale per-read, per-haplotype likelihoods by read representative counts. A read representative count is (temporarily) defined as the average representative count over all bases in read, TBD whether this is good enough to avoid biases in GL's. c) DeBruijn assembler inserts kmers N times in graph, where N is min representative count of read over kmer span - TBD again whether this is the best approach. d) Bug fixes in FragmentUtils: logic to merge fragments was wrong in cases where there is discrepancy of overlaps between unclipped/soft clipped bases. Didn't affect things before but RR makes prevalence of hard-clipped bases in CIGARs more prevalent so this was exposed. e) Cache read representative counts along with read likelihoods associated with a Haplotype. Code can/should be cleaned up and unified with PairHMMIndelErrorModelCode, as well as refactored to support arbitrary ploidy in HaplotypeCaller --- .../haplotypecaller/HaplotypeCaller.java | 6 +-- .../LikelihoodCalculationEngine.java | 24 ++++++++--- .../SimpleDeBruijnAssembler.java | 17 ++++++-- .../SimpleDeBruijnAssemblerUnitTest.java | 40 +++++++++++++++++++ .../broadinstitute/sting/utils/Haplotype.java | 14 ++++++- .../broadinstitute/sting/utils/MathUtils.java | 7 ++++ .../sting/utils/fragments/FragmentUtils.java | 23 ++++++++++- 7 files changed, 115 insertions(+), 16 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 1130feaea..8acab3e3c 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -356,9 +356,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } } } - genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual); - genotypeLikelihoods[AB] += MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF ); - genotypeLikelihoods[BB] += QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; + genotypeLikelihoods[AA] += p.getRepresentativeCount()* QualityUtils.qualToProbLog10(qual); + genotypeLikelihoods[AB] += p.getRepresentativeCount()* MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF ); + genotypeLikelihoods[BB] += p.getRepresentativeCount()* QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; } } genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 365459882..a3179681e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -50,7 +50,6 @@ public class LikelihoodCalculationEngine { } public void computeReadLikelihoods( final ArrayList haplotypes, final HashMap> perSampleReadList ) { - final int numHaplotypes = haplotypes.size(); int X_METRIC_LENGTH = 0; for( final String sample : perSampleReadList.keySet() ) { @@ -60,8 +59,8 @@ public class LikelihoodCalculationEngine { } } int Y_METRIC_LENGTH = 0; - for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { - final int haplotypeLength = haplotypes.get(jjj).getBases().length; + for( Haplotype h: haplotypes ) { + final int haplotypeLength = h.getBases().length; if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; } } @@ -90,8 +89,10 @@ public class LikelihoodCalculationEngine { final int numHaplotypes = haplotypes.size(); final int numReads = reads.size(); final double[][] readLikelihoods = new double[numHaplotypes][numReads]; + final int[][] readCounts = new int[numHaplotypes][numReads]; for( int iii = 0; iii < numReads; iii++ ) { final GATKSAMRecord read = reads.get(iii); + final int readCount = getRepresentativeReadCount(read); final byte[] overallGCP = new byte[read.getReadLength()]; Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? @@ -114,13 +115,23 @@ public class LikelihoodCalculationEngine { readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotype(haplotype.getBases(), read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP, haplotypeStart, matchMetricArray, XMetricArray, YMetricArray); + readCounts[jjj][iii] = readCount; } } for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { - haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj] ); + haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj], readCounts[jjj] ); } } + private static int getRepresentativeReadCount(GATKSAMRecord read) { + if (!read.isReducedRead()) + return 1; + + // compute mean representative read counts + final byte[] counts = read.getReducedReadCounts(); + return MathUtils.sum(counts)/counts.length; + } + private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) { for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){ if( b1[iii] != b2[iii] ) { @@ -154,17 +165,20 @@ public class LikelihoodCalculationEngine { } // compute the diploid haplotype likelihoods + // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code for( int iii = 0; iii < numHaplotypes; iii++ ) { for( int jjj = 0; jjj <= iii; jjj++ ) { for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) { final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample); + final int[] readCounts_iii = iii_mapped.getReadCounts(sample); for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) { final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample); double haplotypeLikelihood = 0.0; for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) { // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) + // log10(10^(a*x1) + 10^(b*x2)) // First term is approximated by Jacobian log with table lookup. - haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF; + haplotypeLikelihood +=readCounts_iii[kkk] *( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF); } haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum? } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index e2bc7a10f..f3dd3babb 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -4,6 +4,7 @@ import com.google.java.contract.Ensures; import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SWPairwiseAlignment; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -68,7 +69,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); } - private void createDeBruijnGraphs( final ArrayList reads, final Haplotype refHaplotype ) { + protected void createDeBruijnGraphs( final List reads, final Haplotype refHaplotype ) { graphs.clear(); // create the graph @@ -161,7 +162,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } } - private static boolean createGraphFromSequences( final DefaultDirectedGraph graph, final ArrayList reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) { + private static boolean createGraphFromSequences( final DefaultDirectedGraph graph, final Collection reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) { final byte[] refSequence = refHaplotype.getBases(); if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) { final int kmersInSequence = refSequence.length - KMER_LENGTH + 1; @@ -183,6 +184,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { for( final GATKSAMRecord read : reads ) { final byte[] sequence = read.getReadBases(); final byte[] qualities = read.getBaseQualities(); + final byte[] reducedReadCounts = read.getReducedReadCounts(); // will be null if read is not readuced if( sequence.length > KMER_LENGTH + KMER_OVERLAP ) { final int kmersInSequence = sequence.length - KMER_LENGTH + 1; for( int iii = 0; iii < kmersInSequence - 1; iii++ ) { @@ -194,6 +196,12 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { break; } } + int countNumber = 1; + if (read.isReducedRead()) { + // compute min (?) number of reduced read counts in current kmer span + countNumber = MathUtils.arrayMin(Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1)); + } + if( !badKmer ) { // get the kmers final byte[] kmer1 = new byte[KMER_LENGTH]; @@ -201,7 +209,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { final byte[] kmer2 = new byte[KMER_LENGTH]; System.arraycopy(sequence, iii+1, kmer2, 0, KMER_LENGTH); - addKmersToGraph(graph, kmer1, kmer2, false); + for (int k=0; k < countNumber; k++) + addKmersToGraph(graph, kmer1, kmer2, false); } } } @@ -230,7 +239,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { return true; } - private void printGraphs() { + protected void printGraphs() { int count = 0; for( final DefaultDirectedGraph graph : graphs ) { GRAPH_WRITER.println("digraph kmer" + count++ +" {"); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java index 4f42d5bc8..a83afdbab 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java @@ -7,6 +7,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; */ import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -18,6 +20,7 @@ import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; +import java.io.PrintStream; import java.util.*; public class SimpleDeBruijnAssemblerUnitTest extends BaseTest { @@ -143,6 +146,43 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest { Assert.assertTrue(graphEquals(graph, expectedGraph)); } + @Test(enabled=true) + public void testBasicGraphCreation() { + final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); + final byte refBase = refPileupTestProvider.getReferenceContext().getBase(); + final String altBase = (refBase==(byte)'A'?"C":"A"); + final int matches = 50; + final int mismatches = 50; + Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30); + PrintStream graphWriter = null; + + try{ + graphWriter = new PrintStream("du.txt"); + } catch (Exception e) {} + + + SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter); + final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases()); + refHaplotype.setIsReference(true); + assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype); + +/* // clean up the graphs by pruning and merging + for( final DefaultDirectedGraph graph : graphs ) { + SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR ); + //eliminateNonRefPaths( graph ); + SimpleDeBruijnAssembler.mergeNodes( graph ); + } + */ + if( graphWriter != null ) { + assembler.printGraphs(); + } + + int k=2; + + // find the best paths in the graphs + // return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); + + } @Test(enabled = true) public void testEliminateNonRefPaths() { DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class); diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 54442622f..eca2e454e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -40,6 +40,7 @@ public class Haplotype { protected final double[] quals; private GenomeLoc genomeLocation = null; private HashMap readLikelihoodsPerSample = null; + private HashMap readCountsPerSample = null; private HashMap eventMap = null; private boolean isRef = false; private Cigar cigar; @@ -83,18 +84,27 @@ public class Haplotype { return Arrays.hashCode(bases); } - public void addReadLikelihoods( final String sample, final double[] readLikelihoods ) { + public void addReadLikelihoods( final String sample, final double[] readLikelihoods, final int[] readCounts ) { if( readLikelihoodsPerSample == null ) { readLikelihoodsPerSample = new HashMap(); } readLikelihoodsPerSample.put(sample, readLikelihoods); + if( readCountsPerSample == null ) { + readCountsPerSample = new HashMap(); + } + readCountsPerSample.put(sample, readCounts); } @Ensures({"result != null"}) public double[] getReadLikelihoods( final String sample ) { return readLikelihoodsPerSample.get(sample); } - + + @Ensures({"result != null"}) + public int[] getReadCounts( final String sample ) { + return readCountsPerSample.get(sample); + } + public Set getSampleKeySet() { return readLikelihoodsPerSample.keySet(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 1f5eaefee..96704f0b8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -210,6 +210,13 @@ public class MathUtils { return total; } + public static int sum(byte[] x) { + int total = 0; + for (byte v : x) + total += (int)v; + return total; + } + /** * Calculates the log10 cumulative sum of an array with log10 probabilities * diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 851272673..2f31c154c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -134,17 +134,36 @@ public class FragmentUtils { GATKSAMRecord firstRead = overlappingPair.get(0); GATKSAMRecord secondRead = overlappingPair.get(1); - if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) { + /* + System.out.println("read 0 unclipped start:"+overlappingPair.get(0).getUnclippedStart()); + System.out.println("read 0 unclipped end:"+overlappingPair.get(0).getUnclippedEnd()); + System.out.println("read 1 unclipped start:"+overlappingPair.get(1).getUnclippedStart()); + System.out.println("read 1 unclipped end:"+overlappingPair.get(1).getUnclippedEnd()); + System.out.println("read 0 start:"+overlappingPair.get(0).getAlignmentStart()); + System.out.println("read 0 end:"+overlappingPair.get(0).getAlignmentEnd()); + System.out.println("read 1 start:"+overlappingPair.get(1).getAlignmentStart()); + System.out.println("read 1 end:"+overlappingPair.get(1).getAlignmentEnd()); + */ + if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { firstRead = overlappingPair.get(1); // swap them secondRead = overlappingPair.get(0); } - if( !(secondRead.getUnclippedStart() <= firstRead.getUnclippedEnd() && secondRead.getUnclippedStart() >= firstRead.getUnclippedStart() && secondRead.getUnclippedEnd() >= firstRead.getUnclippedEnd()) ) { + if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { return overlappingPair; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A } if( firstRead.getCigarString().contains("I") || firstRead.getCigarString().contains("D") || secondRead.getCigarString().contains("I") || secondRead.getCigarString().contains("D") ) { return overlappingPair; // fragments contain indels so don't merge them } +/* // check for inconsistent start positions between uncliped/soft alignment starts + if (secondRead.getAlignmentStart() >= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() < firstRead.getUnclippedStart()) + return overlappingPair; + if (secondRead.getAlignmentStart() <= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() > firstRead.getUnclippedStart()) + return overlappingPair; + + if (secondRead.getUnclippedStart() < firstRead.getAlignmentEnd() && secondRead.getAlignmentStart() >= firstRead.getAlignmentEnd()) + return overlappingPair; + */ final Pair pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart()); final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() ); From ff80f1772160dcfc8225f0423c2025f51d9266c9 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 3 Aug 2012 13:14:37 -0400 Subject: [PATCH 060/176] Using PathComparatorTotalScore in the assembly graph traversal does a better job of capturing low frequency branches that are inside high frequnecy haplotypes. --- .../walkers/haplotypecaller/HaplotypeCaller.java | 2 +- .../gatk/walkers/haplotypecaller/KBestPaths.java | 16 ++++++++-------- .../LikelihoodCalculationEngine.java | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 1130feaea..6bbfb0391 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -415,7 +415,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final ArrayList bestHaplotypes = haplotypes;// ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); + final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); for( final Pair>> callResult : ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java index 45deb9b2a..0ef1a13a4 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/KBestPaths.java @@ -82,11 +82,11 @@ public class KBestPaths { } } - protected static class PathComparatorLowestEdge implements Comparator { - public int compare(final Path path1, final Path path2) { - return path2.lowestEdge - path1.lowestEdge; - } - } + //protected static class PathComparatorLowestEdge implements Comparator { + // public int compare(final Path path1, final Path path2) { + // return path2.lowestEdge - path1.lowestEdge; + // } + //} public static List getKBestPaths( final DefaultDirectedGraph graph, final int k ) { if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); } @@ -99,7 +99,7 @@ public class KBestPaths { } } - Collections.sort(bestPaths, new PathComparatorLowestEdge() ); + Collections.sort(bestPaths, new PathComparatorTotalScore() ); Collections.reverse(bestPaths); return bestPaths.subList(0, Math.min(k, bestPaths.size())); } @@ -114,8 +114,8 @@ public class KBestPaths { if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) { if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) { // clean out some low scoring paths - Collections.sort(bestPaths, new PathComparatorLowestEdge() ); - for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } + Collections.sort(bestPaths, new PathComparatorTotalScore() ); + for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20 } bestPaths.add(path); } else if( n.val > 10000) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 365459882..f1d0a8a12 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine { int hap1 = 0; int hap2 = 0; //double bestElement = Double.NEGATIVE_INFINITY; - final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 ); + final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 ); while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { double maxElement = Double.NEGATIVE_INFINITY; for( int iii = 0; iii < numHaplotypes; iii++ ) { From d2e8eb7b23099be33ba6ed2f4545f874734bb2b9 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Fri, 3 Aug 2012 14:26:51 -0400 Subject: [PATCH 061/176] Fixed 2 haplotype caller unit tests: a) new interface for addReadLikelihoods() including read counts, b) disable test that test basic DeBruijn graph assembly, not ready yet --- .../haplotypecaller/LikelihoodCalculationEngineUnitTest.java | 3 ++- .../haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java index 185641140..e82946690 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java @@ -95,9 +95,10 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest { ArrayList haplotypes = new ArrayList(); for( int iii = 1; iii <= 3; iii++) { Double readLikelihood = ( iii == 1 ? readLikelihoodForHaplotype1 : ( iii == 2 ? readLikelihoodForHaplotype2 : readLikelihoodForHaplotype3) ); + int readCount = 1; if( readLikelihood != null ) { Haplotype haplotype = new Haplotype( (iii == 1 ? "AAAA" : (iii == 2 ? "CCCC" : "TTTT")).getBytes() ); - haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}); + haplotype.addReadLikelihoods("myTestSample", new double[]{readLikelihood}, new int[]{readCount}); haplotypes.add(haplotype); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java index a83afdbab..5652b118d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java @@ -146,7 +146,8 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest { Assert.assertTrue(graphEquals(graph, expectedGraph)); } - @Test(enabled=true) + @Test(enabled=false) +// not ready yet public void testBasicGraphCreation() { final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); final byte refBase = refPileupTestProvider.getReferenceContext().getBase(); From e1bba91836ad68e453839b1ca9c5a72d9957c9f0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 3 Aug 2012 16:01:27 -0400 Subject: [PATCH 062/176] Ready for full-scale evaluation adaptive BQSR contexts -- VisualizeContextTree now can write out an equivalent BQSR table determined after adaptive context merging of all RG x QUAL x CONTEXT trees -- Docs, algorithm descriptions, etc so that it makes sense what's going on -- VisualizeContextTree should really be simplified when into a single tool that just visualize the trees when / if we decide to make adaptive contexts standard part of BQSR -- Misc. cleaning, organization of the code (recalibation tests were in private but corresponding actual files were public) --- .../sting/gatk/report/GATKReportTable.java | 31 ++- .../utils/recalibration/AdaptiveContext.java | 154 +++++++++++++ .../utils/recalibration/RecalDatumNode.java | 217 ++++++++++++++---- .../AdaptiveContextUnitTest.java | 64 ++++++ .../recalibration/QualQuantizerUnitTest.java | 169 ++++++++++++++ .../recalibration/RecalDatumUnitTest.java | 154 +++++++++++++ 6 files changed, 743 insertions(+), 46 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/recalibration/AdaptiveContextUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 7a272e155..3b4bdd087 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -208,11 +208,23 @@ public class GATKReportTable { } /** - * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed - * - * @param name the name of the table or column - * @return true if the name is valid, false if otherwise + * Create a new GATKReportTable with the same structure + * @param tableToCopy */ + public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) { + this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID); + for ( final GATKReportColumn column : tableToCopy.getColumnInfo() ) + addColumn(column.getColumnName(), column.getFormat()); + if ( copyData ) + throw new IllegalArgumentException("sorry, copying data in GATKReportTable isn't supported"); + } + + /** + * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed + * + * @param name the name of the table or column + * @return true if the name is valid, false if otherwise + */ private boolean isValidName(String name) { Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX); Matcher m = p.matcher(name); @@ -490,6 +502,17 @@ public class GATKReportTable { return get(rowIdToIndex.get(rowID), columnNameToIndex.get(columnName)); } + /** + * Get a value from the given position in the table + * + * @param rowIndex the row ID + * @param columnName the name of the column + * @return the value stored at the specified position in the table + */ + public Object get(final int rowIndex, final String columnName) { + return get(rowIndex, columnNameToIndex.get(columnName)); + } + /** * Get a value from the given position in the table * diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java new file mode 100644 index 000000000..083b8af64 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java @@ -0,0 +1,154 @@ +package org.broadinstitute.sting.utils.recalibration; + +import java.util.*; + +/** + * Functions for working with AdaptiveContexts + * + * User: depristo + * Date: 8/3/12 + * Time: 12:21 PM + * To change this template use File | Settings | File Templates. + */ +public class AdaptiveContext { + private AdaptiveContext() {} + + /** + * Return a freshly allocated tree filled in completely to fillDepth with + * all combinations of {A,C,G,T}^filldepth contexts. For nodes + * in the tree, they are simply copied. When the algorithm needs to + * generate new nodes (because they are missing) the subnodes inherit the + * observation and error counts of their parent. + * + * This algorithm produces data consistent with the standard output in a BQSR recal + * file for the Context covariate + * + * @param root + * @param fillDepth + * @return + */ + public static RecalDatumNode fillToDepth(final RecalDatumNode root, final int fillDepth) { + if ( root == null ) throw new IllegalArgumentException("root is null"); + if ( fillDepth < 0 ) throw new IllegalArgumentException("fillDepth is < 0"); + + return fillToDepthRec(root, fillDepth, 0); + } + + private static RecalDatumNode fillToDepthRec(final RecalDatumNode parent, + final int fillDepth, + final int currentDepth) { + // three cases: + // We are in the tree and so just recursively build + // We have reached our depth goal, so just return the parent since we are done + // We are outside of the tree, in which case we need to pointer to our parent node so we can + // we info (N, M) and we need a running context + if ( currentDepth < fillDepth ) { + // we need to create subnodes for each base, and propogate N and M down + final RecalDatumNode newParent = new RecalDatumNode(parent.getRecalDatum()); + + for ( final String base : Arrays.asList("A", "C", "G", "T")) { + ContextDatum subContext; + Set> subContexts; + + final RecalDatumNode subNode = findSubcontext(parent.getRecalDatum().context + base, parent); + if ( subNode != null ) { + // we have a subnode corresponding to the expected one, just copy and recurse + subContext = subNode.getRecalDatum(); + subContexts = subNode.getSubnodes(); + } else { + // have to create a new one + subContext = new ContextDatum(parent.getRecalDatum().context + base, + parent.getRecalDatum().getNumObservations(), parent.getRecalDatum().getNumMismatches()); + subContexts = Collections.emptySet(); + } + + newParent.addSubnode( + fillToDepthRec(new RecalDatumNode(subContext, subContexts), + fillDepth, currentDepth + 1)); + } + return newParent; + } else { + return parent; + } + } + + /** + * Go from a flat list of contexts to the tree implied by the contexts + * + * Implicit nodes are created as needed, and their observation and error counts are the sum of the + * all of their subnodes. + * + * Note this does not guarentee the tree is complete, as some contexts (e.g., AAT) may be missing + * from the tree because they are absent from the input list of contexts. + * + * For input AAG, AAT, AC, G would produce the following tree: + * + * - x [root] + * - A + * - A + * - T + * - G + * - C + * - G + * + * sets the fixed penalties in the resulting tree as well + * + * @param flatContexts list of flat contexts + * @return + */ + public static RecalDatumNode createTreeFromFlatContexts(final List flatContexts) { + if ( flatContexts == null || flatContexts.isEmpty() ) + throw new IllegalArgumentException("flatContexts cannot be empty or null"); + + final Queue> remaining = new LinkedList>(); + final Map> contextToNodes = new HashMap>(); + RecalDatumNode root = null; + + // initialize -- start with all of the contexts + for ( final ContextDatum cd : flatContexts ) + remaining.add(new RecalDatumNode(cd)); + + while ( remaining.peek() != null ) { + final RecalDatumNode add = remaining.poll(); + final ContextDatum cd = add.getRecalDatum(); + + final String parentContext = cd.getParentContext(); + RecalDatumNode parent = contextToNodes.get(parentContext); + if ( parent == null ) { + // haven't yet found parent, so make one, and enqueue it for processing + parent = new RecalDatumNode(new ContextDatum(parentContext, 0, 0)); + contextToNodes.put(parentContext, parent); + + if ( parentContext != ContextDatum.ROOT_CONTEXT ) + remaining.add(parent); + else + root = parent; + } + + parent.getRecalDatum().incrementNumObservations(cd.getNumObservations()); + parent.getRecalDatum().incrementNumMismatches(cd.getNumMismatches()); + parent.addSubnode(add); + } + + if ( root == null ) + throw new RuntimeException("root is unexpectedly null"); + + // set the fixed penalty everywhere in the tree, so that future modifications don't change the penalties + root.calcAndSetFixedPenalty(true); + + return root; + } + + /** + * Finds immediate subnode with contextToFind, or null if none exists + * + * @param tree whose subnodes should be searched + * @return + */ + public static RecalDatumNode findSubcontext(final String contextToFind, final RecalDatumNode tree) { + for ( final RecalDatumNode sub : tree.getSubnodes() ) + if ( sub.getRecalDatum().context.equals(contextToFind) ) + return sub; + return null; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index 3af91be16..1409af7d0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -4,10 +4,12 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.commons.math.stat.inference.ChiSquareTestImpl; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.util.Collection; import java.util.HashSet; +import java.util.LinkedList; import java.util.Set; /** @@ -17,13 +19,18 @@ import java.util.Set; * @since 07/27/12 */ public class RecalDatumNode { - private final static boolean USE_CHI2 = true; protected static Logger logger = Logger.getLogger(RecalDatumNode.class); + + /** + * fixedPenalty is this value if it's considered fixed + */ private final static double UNINITIALIZED = Double.NEGATIVE_INFINITY; + private final T recalDatum; private double fixedPenalty = UNINITIALIZED; private final Set> subnodes; + @Requires({"recalDatum != null"}) public RecalDatumNode(final T recalDatum) { this(recalDatum, new HashSet>()); } @@ -33,28 +40,45 @@ public class RecalDatumNode { return recalDatum.toString(); } + @Requires({"recalDatum != null", "subnodes != null"}) public RecalDatumNode(final T recalDatum, final Set> subnodes) { this(recalDatum, UNINITIALIZED, subnodes); } + @Requires({"recalDatum != null"}) protected RecalDatumNode(final T recalDatum, final double fixedPenalty) { this(recalDatum, fixedPenalty, new HashSet>()); } + @Requires({"recalDatum != null", "subnodes != null"}) protected RecalDatumNode(final T recalDatum, final double fixedPenalty, final Set> subnodes) { this.recalDatum = recalDatum; this.fixedPenalty = fixedPenalty; this.subnodes = new HashSet>(subnodes); } + /** + * Get the recal data associated with this node + * @return + */ + @Ensures("result != null") public T getRecalDatum() { return recalDatum; } + /** + * The set of all subnodes of this tree. May be modified. + * @return + */ + @Ensures("result != null") public Set> getSubnodes() { return subnodes; } + /** + * Return the fixed penalty, if set, or else the the calculated penalty for this node + * @return + */ public double getPenalty() { if ( fixedPenalty != UNINITIALIZED ) return fixedPenalty; @@ -62,6 +86,17 @@ public class RecalDatumNode { return calcPenalty(); } + /** + * Set the fixed penalty for this node to a fresh calculation from calcPenalty + * + * This is important in the case where you want to compute the penalty from a full + * tree and then chop the tree up afterwards while considering the previous penalties. + * If you don't call this function then manipulating the tree may result in the + * penalty functions changing with changes in the tree. + * + * @param doEntireTree recurse into all subnodes? + * @return the fixed penalty for this node + */ public double calcAndSetFixedPenalty(final boolean doEntireTree) { fixedPenalty = calcPenalty(); if ( doEntireTree ) @@ -70,15 +105,41 @@ public class RecalDatumNode { return fixedPenalty; } + /** + * Add node to the set of subnodes of this node + * @param sub + */ + @Requires("sub != null") public void addSubnode(final RecalDatumNode sub) { subnodes.add(sub); } + /** + * Is this a leaf node (i.e., has no subnodes)? + * @return + */ public boolean isLeaf() { return subnodes.isEmpty(); } - public int getNumBranches() { + /** + * Is this node immediately above only leaf nodes? + * + * @return + */ + public boolean isAboveOnlyLeaves() { + for ( final RecalDatumNode sub : subnodes ) + if ( ! sub.isLeaf() ) + return false; + return true; + } + + /** + * What's the immediate number of subnodes from this node? + * @return + */ + @Ensures("result >= 0") + public int getNumSubnodes() { return subnodes.size(); } @@ -89,6 +150,8 @@ public class RecalDatumNode { * definition have 0 penalty unless they represent a pruned tree with underlying -- but now * pruned -- subtrees * + * TODO -- can we really just add together the chi2 values? + * * @return */ public double totalPenalty() { @@ -102,6 +165,10 @@ public class RecalDatumNode { } } + /** + * What's the longest branch from this node to any leaf? + * @return + */ public int maxDepth() { int subMax = 0; for ( final RecalDatumNode sub : subnodes ) @@ -109,6 +176,11 @@ public class RecalDatumNode { return subMax + 1; } + /** + * What's the shortest branch from this node to any leaf? Includes this node + * @return + */ + @Ensures("result > 0") public int minDepth() { if ( isLeaf() ) return 1; @@ -120,6 +192,11 @@ public class RecalDatumNode { } } + /** + * Return the number of nodes, including this one, reachable from this node + * @return + */ + @Ensures("result > 0") public int size() { int size = 1; for ( final RecalDatumNode sub : subnodes ) @@ -127,6 +204,12 @@ public class RecalDatumNode { return size; } + /** + * Count the number of leaf nodes reachable from this node + * + * @return + */ + @Ensures("result >= 0") public int numLeaves() { if ( isLeaf() ) return 1; @@ -138,44 +221,37 @@ public class RecalDatumNode { } } + /** + * Calculate the chi^2 penalty among subnodes of this node. The chi^2 value + * indicates the degree of independence of the implied error rates among the + * immediate subnodes + * + * @return the chi2 penalty, or 0.0 if it cannot be calculated + */ private double calcPenalty() { - if ( USE_CHI2 ) - return calcPenaltyChi2(); - else - return calcPenaltyLog10(getRecalDatum().getEmpiricalErrorRate()); - } - - private double calcPenaltyChi2() { if ( isLeaf() ) return 0.0; + else if ( subnodes.size() == 1 ) + // only one value, so its free to merge away + return 0.0; else { final long[][] counts = new long[subnodes.size()][2]; int i = 0; - for ( RecalDatumNode subnode : subnodes ) { - counts[i][0] = subnode.getRecalDatum().getNumMismatches(); - counts[i][1] = subnode.getRecalDatum().getNumObservations(); + for ( final RecalDatumNode subnode : subnodes ) { + // use the yates correction to help avoid all zeros => NaN + counts[i][0] = subnode.getRecalDatum().getNumMismatches() + 1; + counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2; i++; } final double chi2 = new ChiSquareTestImpl().chiSquare(counts); -// StringBuilder x = new StringBuilder(); -// StringBuilder y = new StringBuilder(); -// for ( int k = 0; k < counts.length; k++) { -// if ( k != 0 ) { -// x.append(", "); -// y.append(", "); -// } -// x.append(counts[k][0]); -// y.append(counts[k][1]); -// } -// logger.info("x = c(" + x.toString() + ")"); -// logger.info("y = c(" + y.toString() + ")"); -// logger.info("chi2 = " + chi2); + // make sure things are reasonable and fail early if not + if (Double.isInfinite(chi2) || Double.isNaN(chi2)) + throw new ReviewedStingException("chi2 value is " + chi2 + " at " + getRecalDatum()); return chi2; - //return Math.log10(chi2); } } @@ -216,11 +292,17 @@ public class RecalDatumNode { } } + /** + * Return a freshly allocated tree prunes to have no more than maxDepth from the root to any leaf + * + * @param maxDepth + * @return + */ public RecalDatumNode pruneToDepth(final int maxDepth) { if ( maxDepth < 1 ) throw new IllegalArgumentException("maxDepth < 1"); else { - final Set> subPruned = new HashSet>(getNumBranches()); + final Set> subPruned = new HashSet>(getNumSubnodes()); if ( maxDepth > 1 ) for ( final RecalDatumNode sub : subnodes ) subPruned.add(sub.pruneToDepth(maxDepth - 1)); @@ -228,12 +310,21 @@ public class RecalDatumNode { } } + /** + * Return a freshly allocated tree with to no more than maxElements in order of penalty + * + * Note that nodes must have fixed penalties to this algorithm will fail. + * + * @param maxElements + * @return + */ public RecalDatumNode pruneByPenalty(final int maxElements) { RecalDatumNode root = this; while ( root.size() > maxElements ) { // remove the lowest penalty element, and continue root = root.removeLowestPenaltyNode(); + logger.debug("pruneByPenalty root size is now " + root.size() + " of max " + maxElements); } // our size is below the target, so we are good, return @@ -241,15 +332,15 @@ public class RecalDatumNode { } /** - * Find the lowest penalty node in the tree, and return a tree without it + * Find the lowest penalty above leaf node in the tree, and return a tree without it * * Note this excludes the current (root) node * * @return */ private RecalDatumNode removeLowestPenaltyNode() { - final Pair, Double> nodeToRemove = getMinPenaltyNode(); - logger.info("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond()); + final Pair, Double> nodeToRemove = getMinPenaltyAboveLeafNode(); + //logger.info("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond()); final Pair, Boolean> result = removeNode(nodeToRemove.getFirst()); @@ -262,20 +353,37 @@ public class RecalDatumNode { return oneRemoved; } - private Pair, Double> getMinPenaltyNode() { - final double myValue = isLeaf() ? Double.MAX_VALUE : getPenalty(); - Pair, Double> maxNode = new Pair, Double>(this, myValue); - - for ( final RecalDatumNode sub : subnodes ) { - final Pair, Double> subFind = sub.getMinPenaltyNode(); - if ( subFind.getSecond() < maxNode.getSecond() ) { - maxNode = subFind; + /** + * Finds in the tree the node with the lowest penalty whose subnodes are all leaves + * + * @return + */ + private Pair, Double> getMinPenaltyAboveLeafNode() { + if ( isLeaf() ) + // not allowed to remove leafs directly + return null; + if ( isAboveOnlyLeaves() ) + // we only consider removing nodes above all leaves + return new Pair, Double>(this, getPenalty()); + else { + // just recurse, taking the result with the min penalty of all subnodes + Pair, Double> minNode = null; + for ( final RecalDatumNode sub : subnodes ) { + final Pair, Double> subFind = sub.getMinPenaltyAboveLeafNode(); + if ( subFind != null && (minNode == null || subFind.getSecond() < minNode.getSecond()) ) { + minNode = subFind; + } } + return minNode; } - - return maxNode; } + /** + * Return a freshly allocated tree without the node nodeToRemove + * + * @param nodeToRemove + * @return + */ private Pair, Boolean> removeNode(final RecalDatumNode nodeToRemove) { if ( this == nodeToRemove ) { if ( isLeaf() ) @@ -288,7 +396,7 @@ public class RecalDatumNode { boolean removedSomething = false; // our sub nodes with the penalty node removed - final Set> sub = new HashSet>(getNumBranches()); + final Set> sub = new HashSet>(getNumSubnodes()); for ( final RecalDatumNode sub1 : subnodes ) { if ( removedSomething ) { @@ -306,4 +414,29 @@ public class RecalDatumNode { return new Pair, Boolean>(node, removedSomething); } } + + /** + * Return a collection of all of the data in the leaf nodes of this tree + * + * @return + */ + public Collection getAllLeaves() { + final LinkedList list = new LinkedList(); + getAllLeavesRec(list); + return list; + } + + /** + * Helpful recursive function for getAllLeaves() + * + * @param list the destination for the list of leaves + */ + private void getAllLeavesRec(final LinkedList list) { + if ( isLeaf() ) + list.add(getRecalDatum()); + else { + for ( final RecalDatumNode sub : subnodes ) + sub.getAllLeavesRec(list); + } + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/AdaptiveContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/AdaptiveContextUnitTest.java new file mode 100644 index 000000000..c07c084b8 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/AdaptiveContextUnitTest.java @@ -0,0 +1,64 @@ +package org.broadinstitute.sting.utils.recalibration; + +import org.broadinstitute.sting.BaseTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +/** + * User: depristo + * Date: 8/3/12 + * Time: 12:26 PM + * To change this template use File | Settings | File Templates. + */ +public class AdaptiveContextUnitTest { + // TODO + // TODO actually need unit tests when we have validated the value of this approach + // TODO particularly before we attempt to optimize the algorithm + // TODO + + // -------------------------------------------------------------------------------- + // + // Provider + // + // -------------------------------------------------------------------------------- + + private class AdaptiveContextTestProvider extends BaseTest.TestDataProvider { + final RecalDatumNode pruned; + final RecalDatumNode full; + + private AdaptiveContextTestProvider(Class c, RecalDatumNode pruned, RecalDatumNode full) { + super(AdaptiveContextTestProvider.class); + this.pruned = pruned; + this.full = full; + } + } + + private RecalDatumNode makeTree(final String context, final int N, final int M, + final RecalDatumNode ... sub) { + final ContextDatum contextDatum = new ContextDatum(context, N, M); + final RecalDatumNode node = new RecalDatumNode(contextDatum); + for ( final RecalDatumNode sub1 : sub ) { + node.addSubnode(sub1); + } + return node; + } + + @DataProvider(name = "AdaptiveContextTestProvider") + public Object[][] makeRecalDatumTestProvider() { +// final RecalDatumNode prune1 = +// makeTree("A", 10, 1, +// makeTree("AA", 11, 2), +// makeTree("AC", 12, 3), +// makeTree("AG", 13, 4), +// makeTree("AT", 14, 5)); +// +// new AdaptiveContextTestProvider(pruned, full); + + return AdaptiveContextTestProvider.getTests(AdaptiveContextTestProvider.class); + } + + @Test(dataProvider = "AdaptiveContextTestProvider") + public void testAdaptiveContextFill(AdaptiveContextTestProvider cfg) { + + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java new file mode 100644 index 000000000..0ff2eaf03 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.recalibration; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class QualQuantizerUnitTest extends BaseTest { + @BeforeSuite + public void before() { + + } + + // -------------------------------------------------------------------------------- + // + // merge case Provider + // + // -------------------------------------------------------------------------------- + + private class QualIntervalTestProvider extends TestDataProvider { + final QualQuantizer.QualInterval left, right; + int exError, exTotal, exQual; + double exErrorRate; + + private QualIntervalTestProvider(int leftE, int leftN, int rightE, int rightN, int exError, int exTotal) { + super(QualIntervalTestProvider.class); + + QualQuantizer qq = new QualQuantizer(0); + left = qq.new QualInterval(10, 10, leftN, leftE, 0); + right = qq.new QualInterval(11, 11, rightN, rightE, 0); + + this.exError = exError; + this.exTotal = exTotal; + this.exErrorRate = (leftE + rightE + 1) / (1.0 * (leftN + rightN + 1)); + this.exQual = QualityUtils.probToQual(1-this.exErrorRate, 0); + } + } + + @DataProvider(name = "QualIntervalTestProvider") + public Object[][] makeQualIntervalTestProvider() { + new QualIntervalTestProvider(10, 100, 10, 1000, 20, 1100); + new QualIntervalTestProvider(0, 100, 10, 900, 10, 1000); + new QualIntervalTestProvider(10, 900, 0, 100, 10, 1000); + new QualIntervalTestProvider(0, 0, 10, 100, 10, 100); + new QualIntervalTestProvider(1, 10, 9, 90, 10, 100); + new QualIntervalTestProvider(1, 10, 9, 100000, 10, 100010); + new QualIntervalTestProvider(1, 10, 9, 1000000, 10,1000010); + + return QualIntervalTestProvider.getTests(QualIntervalTestProvider.class); + } + + @Test(dataProvider = "QualIntervalTestProvider") + public void testQualInterval(QualIntervalTestProvider cfg) { + QualQuantizer.QualInterval merged = cfg.left.merge(cfg.right); + Assert.assertEquals(merged.nErrors, cfg.exError); + Assert.assertEquals(merged.nObservations, cfg.exTotal); + Assert.assertEquals(merged.getErrorRate(), cfg.exErrorRate); + Assert.assertEquals(merged.getQual(), cfg.exQual); + } + + @Test + public void testMinInterestingQual() { + for ( int q = 0; q < 15; q++ ) { + for ( int minQual = 0; minQual <= 10; minQual ++ ) { + QualQuantizer qq = new QualQuantizer(minQual); + QualQuantizer.QualInterval left = qq.new QualInterval(q, q, 100, 10, 0); + QualQuantizer.QualInterval right = qq.new QualInterval(q+1, q+1, 1000, 100, 0); + + QualQuantizer.QualInterval merged = left.merge(right); + boolean shouldBeFree = q+1 <= minQual; + if ( shouldBeFree ) + Assert.assertEquals(merged.getPenalty(), 0.0); + else + Assert.assertTrue(merged.getPenalty() > 0.0); + } + } + } + + + // -------------------------------------------------------------------------------- + // + // High-level case Provider + // + // -------------------------------------------------------------------------------- + + private class QuantizerTestProvider extends TestDataProvider { + final List nObservationsPerQual = new ArrayList(); + final int nLevels; + final List expectedMap; + + private QuantizerTestProvider(final List nObservationsPerQual, final int nLevels, final List expectedMap) { + super(QuantizerTestProvider.class); + + for ( int x : nObservationsPerQual ) + this.nObservationsPerQual.add((long)x); + this.nLevels = nLevels; + this.expectedMap = expectedMap; + } + + @Override + public String toString() { + return String.format("QQTest nLevels=%d nObs=[%s] map=[%s]", + nLevels, Utils.join(",", nObservationsPerQual), Utils.join(",", expectedMap)); + } + } + + @DataProvider(name = "QuantizerTestProvider") + public Object[][] makeQuantizerTestProvider() { + List allQ2 = Arrays.asList(0, 0, 1000, 0, 0); + + new QuantizerTestProvider(allQ2, 5, Arrays.asList(0, 1, 2, 3, 4)); + new QuantizerTestProvider(allQ2, 1, Arrays.asList(2, 2, 2, 2, 2)); + + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 0, 1000), 2, Arrays.asList(2, 2, 2, 2, 4)); + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 1, 1000), 2, Arrays.asList(2, 2, 2, 4, 4)); + new QuantizerTestProvider(Arrays.asList(0, 0, 1000, 10, 1000), 2, Arrays.asList(2, 2, 2, 2, 4)); + + return QuantizerTestProvider.getTests(QuantizerTestProvider.class); + } + + @Test(dataProvider = "QuantizerTestProvider", enabled = true) + public void testQuantizer(QuantizerTestProvider cfg) { + QualQuantizer qq = new QualQuantizer(cfg.nObservationsPerQual, cfg.nLevels, 0); + logger.warn("cfg: " + cfg); + for ( int i = 0; i < cfg.expectedMap.size(); i++) { + int expected = cfg.expectedMap.get(i); + int observed = qq.originalToQuantizedMap.get(i); + //logger.warn(String.format(" qq map: %s : %d => %d", i, expected, observed)); + Assert.assertEquals(observed, expected); + } + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java new file mode 100644 index 000000000..33985e0ac --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.recalibration; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + + +public class RecalDatumUnitTest extends BaseTest { + + // -------------------------------------------------------------------------------- + // + // merge case Provider + // + // -------------------------------------------------------------------------------- + + private class RecalDatumTestProvider extends TestDataProvider { + int exError, exTotal, reportedQual; + + private RecalDatumTestProvider(int E, int N, int reportedQual) { + super(RecalDatumTestProvider.class); + + this.exError = E; + this.exTotal = N; + this.reportedQual = reportedQual; + } + + public double getErrorRate() { + return (exError + 1) / (1.0 * (exTotal + 2)); + } + + public double getErrorRatePhredScaled() { + return QualityUtils.phredScaleErrorRate(getErrorRate()); + } + + public int getReportedQual() { + return reportedQual; + } + + public RecalDatum makeRecalDatum() { + return new RecalDatum(exTotal, exError, (byte)getReportedQual()); + } + + @Override + public String toString() { + return String.format("exError=%d, exTotal=%d, reportedQual=%d", exError, exTotal, reportedQual); + } + } + + @DataProvider(name = "RecalDatumTestProvider") + public Object[][] makeRecalDatumTestProvider() { + for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) ) + for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) ) + for ( int reportedQual : Arrays.asList(10, 20) ) + if ( E <= N ) + new RecalDatumTestProvider(E, N, reportedQual); + return RecalDatumTestProvider.getTests(RecalDatumTestProvider.class); + } + + @Test(dataProvider = "RecalDatumTestProvider") + public void testRecalDatumBasics(RecalDatumTestProvider cfg) { + final RecalDatum datum = cfg.makeRecalDatum(); + assertBasicFeaturesOfRecalDatum(datum, cfg); + } + + private static void assertBasicFeaturesOfRecalDatum(final RecalDatum datum, final RecalDatumTestProvider cfg) { + Assert.assertEquals(datum.getNumMismatches(), cfg.exError); + Assert.assertEquals(datum.getNumObservations(), cfg.exTotal); + if ( cfg.getReportedQual() != -1 ) + Assert.assertEquals(datum.getEstimatedQReportedAsByte(), cfg.getReportedQual()); + BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalQuality(), cfg.getErrorRatePhredScaled()); + BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalErrorRate(), cfg.getErrorRate()); + } + + @Test(dataProvider = "RecalDatumTestProvider") + public void testRecalDatumCopyAndCombine(RecalDatumTestProvider cfg) { + final RecalDatum datum = cfg.makeRecalDatum(); + final RecalDatum copy = new RecalDatum(datum); + assertBasicFeaturesOfRecalDatum(copy, cfg); + + RecalDatumTestProvider combinedCfg = new RecalDatumTestProvider(cfg.exError * 2, cfg.exTotal * 2, cfg.reportedQual); + copy.combine(datum); + assertBasicFeaturesOfRecalDatum(copy, combinedCfg); + } + + @Test(dataProvider = "RecalDatumTestProvider") + public void testRecalDatumModification(RecalDatumTestProvider cfg) { + RecalDatum datum = cfg.makeRecalDatum(); + datum.setEmpiricalQuality(10.1); + Assert.assertEquals(datum.getEmpiricalQuality(), 10.1); + + datum.setEstimatedQReported(10.1); + Assert.assertEquals(datum.getEstimatedQReported(), 10.1); + Assert.assertEquals(datum.getEstimatedQReportedAsByte(), 10); + + datum = cfg.makeRecalDatum(); + cfg.exTotal = 100000; + datum.setNumObservations(cfg.exTotal); + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); + cfg.exError = 1000; + datum.setNumMismatches(cfg.exError); + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); + datum.increment(true); + cfg.exError++; + cfg.exTotal++; + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); + datum.increment(10, 5); + cfg.exError += 5; + cfg.exTotal += 10; + assertBasicFeaturesOfRecalDatum(datum, cfg); + } +} \ No newline at end of file From b7eec2fd0eb5acfd0f29c970717827f9d8d13100 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Sun, 5 Aug 2012 12:29:10 -0400 Subject: [PATCH 063/176] Bug fixes related to the changes in allele padding. If a haplotype started with an insertion it led to array index out of bounds. Haplotype allele insert function is now very simple because all alleles are treated the same way. HaplotypeUnitTest now uses a variant context instead of creating Allele objects directly. --- .../haplotypecaller/GenotypingEngine.java | 31 ++++------- .../SimpleDeBruijnAssembler.java | 5 +- .../VariantRecalibrator.java | 2 +- .../broadinstitute/sting/utils/Haplotype.java | 51 ++++--------------- .../sting/utils/HaplotypeUnitTest.java | 39 +++++++------- 5 files changed, 47 insertions(+), 81 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 2787689b5..52c13d124 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -533,27 +533,18 @@ public class GenotypingEngine { final int elementLength = ce.getLength(); switch( ce.getOperator() ) { case I: - final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base - boolean allN = true; - for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base - if( insertionBases[i] != (byte) 'N' ) { - allN = false; - break; - } - } - if( !allN ) { - final ArrayList insertionAlleles = new ArrayList(); - final int insertionStart = refLoc.getStart() + refPos - 1; - insertionAlleles.add( Allele.create(ref[refPos-1], true) ); - if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { - insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); - vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); - } else { - insertionAlleles.add( Allele.create(insertionBases, false) ); - vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); - } - + final ArrayList insertionAlleles = new ArrayList(); + final int insertionStart = refLoc.getStart() + refPos - 1; + insertionAlleles.add( Allele.create(ref[refPos-1], true) ); + if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { + insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); + } else { + byte[] insertionBases = new byte[]{}; + insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base + insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength )); + insertionAlleles.add( Allele.create(insertionBases, false) ); } + vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); alignmentPos += elementLength; break; case S: diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index e2bc7a10f..be6c4a51f 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -281,7 +281,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() ); if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) { if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present - final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place + final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) { @@ -311,7 +311,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList haplotypeList, final int activeRegionStart, final int activeRegionStop ) { - //final int sizeOfActiveRegion = activeRegionStop - activeRegionStart; + if( haplotype == null ) { return false; } + final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index ab2ff6176..c670ad2fd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -331,7 +331,7 @@ public class VariantRecalibrator extends RodWalker= 0"}) - public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) { - - if( refAllele.length() != altAllele.length() ) { refInsertLocation++; } + public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) { + // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true); - if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype - return new Haplotype(bases.clone()); + if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype + return null; } - byte[] newHaplotype; - - try { - if( refAllele.length() == altAllele.length() ) { // SNP or MNP - newHaplotype = bases.clone(); - for( int iii = 0; iii < altAllele.length(); iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii]; - } - } else if( refAllele.length() < altAllele.length() ) { // insertion - final int altAlleleLength = altAllele.length() - 1; - newHaplotype = new byte[bases.length + altAlleleLength]; - for( int iii = 0; iii < bases.length; iii++ ) { - newHaplotype[iii] = bases[iii]; - } - for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) { - newHaplotype[iii] = newHaplotype[iii-altAlleleLength]; - } - for( int iii = 0; iii < altAlleleLength; iii++ ) { - newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1]; - } - } else { // deletion - final int shift = refAllele.length() - altAllele.length(); - final int altAlleleLength = altAllele.length() - 1; - newHaplotype = new byte[bases.length - shift]; - for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) { - newHaplotype[iii] = bases[iii]; - } - for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) { - newHaplotype[iii] = bases[iii+shift]; - } - } - } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding - return new Haplotype(bases.clone()); - } - - return new Haplotype(newHaplotype); + byte[] newHaplotypeBases = new byte[]{}; + newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant + newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant + newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant + return new Haplotype(newHaplotypeBases); } public static LinkedHashMap makeHaplotypeListFromAlleles(final List alleleList, diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java index 161eefa8f..ddffb6e4c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java @@ -31,6 +31,8 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -53,11 +55,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases); - h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA"; + basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAACTTACTGGTCAACTGGTCAACTGGTCA"; basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA"; - basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 16, h1Cigar, bases, h1bases); } @Test @@ -68,11 +70,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases); - h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA"; - basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases); + basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases); + h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA"; + basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; - basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases); + basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases); } @Test @@ -102,11 +104,11 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases); + basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); + basicInsertTest("C", "CACTT", 6, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC"; - basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases); + basicInsertTest("G", "GACTT", 16, h1Cigar, bases, h1bases); } @Test @@ -120,12 +122,12 @@ public class HaplotypeUnitTest extends BaseTest { h1CigarList.add(new CigarElement(3, CigarOperator.D)); h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); - String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases); - h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC"; - basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases); + String h1bases = "A" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; + basicInsertTest("ATCG", "A", 0, h1Cigar, bases, h1bases); + h1bases = "ATCG" + "CCGGCCGGCC" + "ATAAAG" + "AGGGGGA" + "AGGC"; + basicInsertTest("CGATC", "AAA", 6, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC"; - basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases); + basicInsertTest("GGGGG", "G", 16, h1Cigar, bases, h1bases); } @Test @@ -148,13 +150,16 @@ public class HaplotypeUnitTest extends BaseTest { } private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) { - final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1); final Haplotype h = new Haplotype(hap.getBytes()); final Allele h1refAllele = Allele.create(ref, true); final Allele h1altAllele = Allele.create(alt, false); + final ArrayList alleles = new ArrayList(); + alleles.add(h1refAllele); + alleles.add(h1altAllele); + final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make(); h.setAlignmentStartHapwrtRef(0); h.setCigar(cigar); - final Haplotype h1 = h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE); + final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc); final Haplotype h1expected = new Haplotype(newHap.getBytes()); Assert.assertEquals(h1, h1expected); } From 8f95a03bb61ae886a7ead3d7e836766d964c4d06 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 6 Aug 2012 11:19:54 -0400 Subject: [PATCH 064/176] Prevent NumberFormatExceptions when parsing the VCF POS field --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 996cef8a4..043e5e185 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -237,7 +237,12 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec // parse out the required fields final String chr = getCachedString(parts[0]); builder.chr(chr); - int pos = Integer.valueOf(parts[1]); + int pos = -1; + try { + pos = Integer.valueOf(parts[1]); + } catch (NumberFormatException e) { + generateException(parts[1] + " is not a valid start position in the VCF format"); + } builder.start(pos); if ( parts[2].length() == 0 ) From 210db5ec271a0071b87b3d138807723b4f6bef7c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 6 Aug 2012 11:31:18 -0400 Subject: [PATCH 065/176] Update -maxAlleles argument to -maxAltAlleles to make it more accurate. The hidden GSA production -capMaxAllelesForIndels argument also gets updated. --- .../gatk/walkers/genotyper/UnifiedArgumentCollection.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 020f7904d..69f1176cc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -114,11 +114,11 @@ public class UnifiedArgumentCollection { * that you not play around with this parameter. */ @Advanced - @Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) + @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 3; @Hidden - @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false) + @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false) public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false; // indel-related arguments From 973d1d47ed8222c9db1695c008f72282be83b102 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 6 Aug 2012 11:40:07 -0400 Subject: [PATCH 066/176] Merging together the computeDiploidHaplotypeLikelihoods functions in the HaplotypeCaller's LikelihoodEngine so they both benefit from the ReducedRead's RepresentativeCount --- .../haplotypecaller/HaplotypeCaller.java | 6 +- .../LikelihoodCalculationEngine.java | 85 +++++++------------ 2 files changed, 34 insertions(+), 57 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 19bd29564..70ed9c242 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -357,9 +357,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } } } - genotypeLikelihoods[AA] += p.getRepresentativeCount()* QualityUtils.qualToProbLog10(qual); - genotypeLikelihoods[AB] += p.getRepresentativeCount()* MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF ); - genotypeLikelihoods[BB] += p.getRepresentativeCount()* QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; + genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual); + genotypeLikelihoods[AB] += p.getRepresentativeCount() * MathUtils.approximateLog10SumLog10( QualityUtils.qualToProbLog10(qual) + LOG_ONE_HALF, QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD + LOG_ONE_HALF ); + genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; } } genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index a3179681e..939b3c375 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -59,7 +59,7 @@ public class LikelihoodCalculationEngine { } } int Y_METRIC_LENGTH = 0; - for( Haplotype h: haplotypes ) { + for( final Haplotype h : haplotypes ) { final int haplotypeLength = h.getBases().length; if( haplotypeLength > Y_METRIC_LENGTH ) { Y_METRIC_LENGTH = haplotypeLength; } } @@ -92,7 +92,7 @@ public class LikelihoodCalculationEngine { final int[][] readCounts = new int[numHaplotypes][numReads]; for( int iii = 0; iii < numReads; iii++ ) { final GATKSAMRecord read = reads.get(iii); - final int readCount = getRepresentativeReadCount(read); + final int readCount = getRepresentativeReadCount(read); final byte[] overallGCP = new byte[read.getReadLength()]; Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? @@ -153,10 +153,20 @@ public class LikelihoodCalculationEngine { } return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping ); } - + + // This function takes just a single sample and a haplotypeMapping @Requires({"haplotypeMapping.size() > 0"}) @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"}) public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList> haplotypeMapping ) { + final TreeSet sampleSet = new TreeSet(); + sampleSet.add(sample); + return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping); + } + + // This function takes a set of samples to pool over and a haplotypeMapping + @Requires({"haplotypeMapping.size() > 0"}) + @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"}) + public static double[][] computeDiploidHaplotypeLikelihoods( final Set samples, final ArrayList> haplotypeMapping ) { final int numHaplotypes = haplotypeMapping.size(); final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes]; @@ -169,16 +179,18 @@ public class LikelihoodCalculationEngine { for( int iii = 0; iii < numHaplotypes; iii++ ) { for( int jjj = 0; jjj <= iii; jjj++ ) { for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) { - final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample); - final int[] readCounts_iii = iii_mapped.getReadCounts(sample); for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) { - final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample); double haplotypeLikelihood = 0.0; - for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) { - // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) - // log10(10^(a*x1) + 10^(b*x2)) - // First term is approximated by Jacobian log with table lookup. - haplotypeLikelihood +=readCounts_iii[kkk] *( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF); + for( final String sample : samples ) { + final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample); + final int[] readCounts_iii = iii_mapped.getReadCounts(sample); + final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample); + for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) { + // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) + // log10(10^(a*x1) + 10^(b*x2)) ??? + // First term is approximated by Jacobian log with table lookup. + haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF ); + } } haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum? } @@ -190,48 +202,6 @@ public class LikelihoodCalculationEngine { return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix ); } - @Requires({"haplotypes.size() > 0"}) - @Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"}) - public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList haplotypes, final Set samples ) { - // set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization? - final ArrayList> haplotypeMapping = new ArrayList>(); - for( final Haplotype h : haplotypes ) { - final ArrayList list = new ArrayList(); - list.add(h); - haplotypeMapping.add(list); - } - - final int numHaplotypes = haplotypeMapping.size(); - final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes]; - for( int iii = 0; iii < numHaplotypes; iii++ ) { - Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY); - } - - // compute the diploid haplotype likelihoods - for( int iii = 0; iii < numHaplotypes; iii++ ) { - for( int jjj = 0; jjj <= iii; jjj++ ) { - for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) { - for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) { - double haplotypeLikelihood = 0.0; - for( final String sample : samples ) { - final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample); - final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample); - for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) { - // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) - // First term is approximated by Jacobian log with table lookup. - haplotypeLikelihood += MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF; - } - } - haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum? - } - } - } - } - - // normalize the diploid likelihoods matrix - return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix ); - } - @Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"}) @Ensures({"result.length == result[0].length", "result.length == likelihoodMatrix.length"}) protected static double[][] normalizeDiploidLikelihoodMatrixFromLog10( final double[][] likelihoodMatrix ) { @@ -320,7 +290,14 @@ public class LikelihoodCalculationEngine { final Set sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples final ArrayList bestHaplotypesIndexList = new ArrayList(); bestHaplotypesIndexList.add(0); // always start with the reference haplotype - final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( haplotypes, sampleKeySet ); // all samples pooled together + // set up the default 1-to-1 haplotype mapping object + final ArrayList> haplotypeMapping = new ArrayList>(); + for( final Haplotype h : haplotypes ) { + final ArrayList list = new ArrayList(); + list.add(h); + haplotypeMapping.add(list); + } + final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together int hap1 = 0; int hap2 = 0; From b4841548f12086e8928d4c749bf66fa29f2eaf4f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 3 Aug 2012 17:53:46 -0400 Subject: [PATCH 067/176] Bug fixes and misc. improvements to running the adaptive context tools -- Better output file name defaults -- Fixed nasty bug where I included non-existant quals in the contexts to process because they showed up in the Cycle covariate -- Data is processed in qual order now, so it's easier to see progress -- Logger messages explaining where we are in the process -- When in UPDATE mode we still write out the information for an equivalent prune by depth for post analysis --- .../sting/utils/recalibration/RecalDatumNode.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index 1409af7d0..e6ec6a520 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -324,7 +324,8 @@ public class RecalDatumNode { while ( root.size() > maxElements ) { // remove the lowest penalty element, and continue root = root.removeLowestPenaltyNode(); - logger.debug("pruneByPenalty root size is now " + root.size() + " of max " + maxElements); + if ( logger.isDebugEnabled() ) + logger.debug("pruneByPenalty root size is now " + root.size() + " of max " + maxElements); } // our size is below the target, so we are good, return From 7bf5ca51eee6c8157e1b8e8a76aad88eafeee0c4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 4 Aug 2012 11:28:17 -0400 Subject: [PATCH 068/176] Major bugfix for adaptive contexts -- Basically I was treating the context history in the wrong direction, effectively predicting the further bases in the context based on the closer one. Totally backward. Updated the code to build the tree in the right direction. -- Added a few more useful outputs for analysis (minPenalty and maxPenalty) -- Misc. cleanup of the code -- Overall I'm not 100% certain this is even the right way to think about the problem. Clearly this is producing a reasonable output but the sum of chi2 values over the entire tree is just enormous. Perhaps a MCMC convergence / sampling criterion would be a better way to think about this problem? --- .../utils/recalibration/AdaptiveContext.java | 37 +++++++++++++++---- .../utils/recalibration/RecalDatumNode.java | 22 +++++++++++ 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java index 083b8af64..b8a2eaf05 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java @@ -21,22 +21,42 @@ public class AdaptiveContext { * observation and error counts of their parent. * * This algorithm produces data consistent with the standard output in a BQSR recal - * file for the Context covariate + * file for the Context covariate. + * + * Suppose you have input tree + * + * - x + * - A + * - C + * - T + * - C + * - T + * + * This will produce the following contexts with fill depth of 2 + * + * AA <- gets A info + * CA <- gets CA info + * GA <- gets A info + * TA <- get TA info + * .. for all 16 contexts * * @param root * @param fillDepth * @return */ - public static RecalDatumNode fillToDepth(final RecalDatumNode root, final int fillDepth) { + public static RecalDatumNode fillToDepth(final RecalDatumNode root, + final int fillDepth, + final boolean debugWriteN) { if ( root == null ) throw new IllegalArgumentException("root is null"); if ( fillDepth < 0 ) throw new IllegalArgumentException("fillDepth is < 0"); - return fillToDepthRec(root, fillDepth, 0); + return fillToDepthRec(root, fillDepth, 0, debugWriteN); } private static RecalDatumNode fillToDepthRec(final RecalDatumNode parent, final int fillDepth, - final int currentDepth) { + final int currentDepth, + final boolean debugWriteN) { // three cases: // We are in the tree and so just recursively build // We have reached our depth goal, so just return the parent since we are done @@ -47,24 +67,25 @@ public class AdaptiveContext { final RecalDatumNode newParent = new RecalDatumNode(parent.getRecalDatum()); for ( final String base : Arrays.asList("A", "C", "G", "T")) { + final String subContextBases = base + parent.getRecalDatum().context; ContextDatum subContext; Set> subContexts; - final RecalDatumNode subNode = findSubcontext(parent.getRecalDatum().context + base, parent); + final RecalDatumNode subNode = findSubcontext(subContextBases, parent); if ( subNode != null ) { // we have a subnode corresponding to the expected one, just copy and recurse subContext = subNode.getRecalDatum(); subContexts = subNode.getSubnodes(); } else { // have to create a new one - subContext = new ContextDatum(parent.getRecalDatum().context + base, + subContext = new ContextDatum(debugWriteN ? ("N" + parent.getRecalDatum().context ) : subContextBases, parent.getRecalDatum().getNumObservations(), parent.getRecalDatum().getNumMismatches()); subContexts = Collections.emptySet(); } newParent.addSubnode( fillToDepthRec(new RecalDatumNode(subContext, subContexts), - fillDepth, currentDepth + 1)); + fillDepth, currentDepth + 1, debugWriteN)); } return newParent; } else { @@ -119,7 +140,7 @@ public class AdaptiveContext { parent = new RecalDatumNode(new ContextDatum(parentContext, 0, 0)); contextToNodes.put(parentContext, parent); - if ( parentContext != ContextDatum.ROOT_CONTEXT ) + if ( ! parent.getRecalDatum().isRootContext() ) remaining.add(parent); else root = parent; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index e6ec6a520..e792a808d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -165,6 +165,28 @@ public class RecalDatumNode { } } + /** + * The maximum penalty among all nodes + * @return + */ + public double maxPenalty() { + double max = getPenalty(); + for ( final RecalDatumNode sub : subnodes ) + max = Math.max(max, sub.maxPenalty()); + return max; + } + + /** + * The minimum penalty among all nodes + * @return + */ + public double minPenalty() { + double min = getPenalty(); + for ( final RecalDatumNode sub : subnodes ) + min = Math.min(min, sub.minPenalty()); + return min; + } + /** * What's the longest branch from this node to any leaf? * @return From 2f004665fb1f51728cdb586479ae7202d1c2e7cc Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 6 Aug 2012 11:42:12 -0400 Subject: [PATCH 069/176] Fixing public -> private dep --- .../utils/recalibration/AdaptiveContext.java | 175 ------------------ 1 file changed, 175 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java deleted file mode 100644 index b8a2eaf05..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/AdaptiveContext.java +++ /dev/null @@ -1,175 +0,0 @@ -package org.broadinstitute.sting.utils.recalibration; - -import java.util.*; - -/** - * Functions for working with AdaptiveContexts - * - * User: depristo - * Date: 8/3/12 - * Time: 12:21 PM - * To change this template use File | Settings | File Templates. - */ -public class AdaptiveContext { - private AdaptiveContext() {} - - /** - * Return a freshly allocated tree filled in completely to fillDepth with - * all combinations of {A,C,G,T}^filldepth contexts. For nodes - * in the tree, they are simply copied. When the algorithm needs to - * generate new nodes (because they are missing) the subnodes inherit the - * observation and error counts of their parent. - * - * This algorithm produces data consistent with the standard output in a BQSR recal - * file for the Context covariate. - * - * Suppose you have input tree - * - * - x - * - A - * - C - * - T - * - C - * - T - * - * This will produce the following contexts with fill depth of 2 - * - * AA <- gets A info - * CA <- gets CA info - * GA <- gets A info - * TA <- get TA info - * .. for all 16 contexts - * - * @param root - * @param fillDepth - * @return - */ - public static RecalDatumNode fillToDepth(final RecalDatumNode root, - final int fillDepth, - final boolean debugWriteN) { - if ( root == null ) throw new IllegalArgumentException("root is null"); - if ( fillDepth < 0 ) throw new IllegalArgumentException("fillDepth is < 0"); - - return fillToDepthRec(root, fillDepth, 0, debugWriteN); - } - - private static RecalDatumNode fillToDepthRec(final RecalDatumNode parent, - final int fillDepth, - final int currentDepth, - final boolean debugWriteN) { - // three cases: - // We are in the tree and so just recursively build - // We have reached our depth goal, so just return the parent since we are done - // We are outside of the tree, in which case we need to pointer to our parent node so we can - // we info (N, M) and we need a running context - if ( currentDepth < fillDepth ) { - // we need to create subnodes for each base, and propogate N and M down - final RecalDatumNode newParent = new RecalDatumNode(parent.getRecalDatum()); - - for ( final String base : Arrays.asList("A", "C", "G", "T")) { - final String subContextBases = base + parent.getRecalDatum().context; - ContextDatum subContext; - Set> subContexts; - - final RecalDatumNode subNode = findSubcontext(subContextBases, parent); - if ( subNode != null ) { - // we have a subnode corresponding to the expected one, just copy and recurse - subContext = subNode.getRecalDatum(); - subContexts = subNode.getSubnodes(); - } else { - // have to create a new one - subContext = new ContextDatum(debugWriteN ? ("N" + parent.getRecalDatum().context ) : subContextBases, - parent.getRecalDatum().getNumObservations(), parent.getRecalDatum().getNumMismatches()); - subContexts = Collections.emptySet(); - } - - newParent.addSubnode( - fillToDepthRec(new RecalDatumNode(subContext, subContexts), - fillDepth, currentDepth + 1, debugWriteN)); - } - return newParent; - } else { - return parent; - } - } - - /** - * Go from a flat list of contexts to the tree implied by the contexts - * - * Implicit nodes are created as needed, and their observation and error counts are the sum of the - * all of their subnodes. - * - * Note this does not guarentee the tree is complete, as some contexts (e.g., AAT) may be missing - * from the tree because they are absent from the input list of contexts. - * - * For input AAG, AAT, AC, G would produce the following tree: - * - * - x [root] - * - A - * - A - * - T - * - G - * - C - * - G - * - * sets the fixed penalties in the resulting tree as well - * - * @param flatContexts list of flat contexts - * @return - */ - public static RecalDatumNode createTreeFromFlatContexts(final List flatContexts) { - if ( flatContexts == null || flatContexts.isEmpty() ) - throw new IllegalArgumentException("flatContexts cannot be empty or null"); - - final Queue> remaining = new LinkedList>(); - final Map> contextToNodes = new HashMap>(); - RecalDatumNode root = null; - - // initialize -- start with all of the contexts - for ( final ContextDatum cd : flatContexts ) - remaining.add(new RecalDatumNode(cd)); - - while ( remaining.peek() != null ) { - final RecalDatumNode add = remaining.poll(); - final ContextDatum cd = add.getRecalDatum(); - - final String parentContext = cd.getParentContext(); - RecalDatumNode parent = contextToNodes.get(parentContext); - if ( parent == null ) { - // haven't yet found parent, so make one, and enqueue it for processing - parent = new RecalDatumNode(new ContextDatum(parentContext, 0, 0)); - contextToNodes.put(parentContext, parent); - - if ( ! parent.getRecalDatum().isRootContext() ) - remaining.add(parent); - else - root = parent; - } - - parent.getRecalDatum().incrementNumObservations(cd.getNumObservations()); - parent.getRecalDatum().incrementNumMismatches(cd.getNumMismatches()); - parent.addSubnode(add); - } - - if ( root == null ) - throw new RuntimeException("root is unexpectedly null"); - - // set the fixed penalty everywhere in the tree, so that future modifications don't change the penalties - root.calcAndSetFixedPenalty(true); - - return root; - } - - /** - * Finds immediate subnode with contextToFind, or null if none exists - * - * @param tree whose subnodes should be searched - * @return - */ - public static RecalDatumNode findSubcontext(final String contextToFind, final RecalDatumNode tree) { - for ( final RecalDatumNode sub : tree.getSubnodes() ) - if ( sub.getRecalDatum().context.equals(contextToFind) ) - return sub; - return null; - } -} From 44f160f29fcec834507ae67b7afbfb4aa77c8b3e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 6 Aug 2012 11:42:36 -0400 Subject: [PATCH 070/176] indelGOP and indelGCP are now advanced, not hidden arguments --- .../gatk/walkers/genotyper/UnifiedArgumentCollection.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 69f1176cc..a885d8a58 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -146,12 +146,12 @@ public class UnifiedArgumentCollection { @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false) public double INDEL_HETEROZYGOSITY = 1.0/8000; - @Hidden - @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false) + @Advanced + @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false) public byte INDEL_GAP_CONTINUATION_PENALTY = 10; - @Hidden - @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false) + @Advanced + @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10", required = false) public byte INDEL_GAP_OPEN_PENALTY = 45; @Hidden From d85b38e4da8990925d457d2b7b613c6f2caf4015 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 6 Aug 2012 12:02:19 -0400 Subject: [PATCH 071/176] Updating HaplotypeCaller integration tests --- .../haplotypecaller/HaplotypeCallerIntegrationTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 17ad37deb..dd2022fa7 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -20,17 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "eff4c820226abafcaa058c66585198a7"); + HCTest(CEUTRIO_BAM, "", "29ebfabcd4a42d4c5c2a576219cffb3d"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "2b40b314e6e63ae165186b55b14eee41"); + HCTest(NA12878_BAM, "", "9732313b8a12faa347f6ebe96518c5df"); } @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "553870cc4d7e66f30862f8ae5dee01ff"); + HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "5e1d49d4110cd96c2e25f8e1da217e9e"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -41,7 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(CEUTRIO_BAM, "", "0936c41e8f006174f7cf27d97235133e"); + HCTestComplexVariants(CEUTRIO_BAM, "", "53df51e6071664725f6e7497f5ee5adf"); } } From 00858f16a6bd72e58c42ca2e63fd447dd5faf7ee Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 6 Aug 2012 12:57:55 -0400 Subject: [PATCH 072/176] Deleting empty unit test for AdaptiveContexts --- .../AdaptiveContextUnitTest.java | 64 ------------------- 1 file changed, 64 deletions(-) delete mode 100644 public/java/test/org/broadinstitute/sting/utils/recalibration/AdaptiveContextUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/AdaptiveContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/AdaptiveContextUnitTest.java deleted file mode 100644 index c07c084b8..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/AdaptiveContextUnitTest.java +++ /dev/null @@ -1,64 +0,0 @@ -package org.broadinstitute.sting.utils.recalibration; - -import org.broadinstitute.sting.BaseTest; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * User: depristo - * Date: 8/3/12 - * Time: 12:26 PM - * To change this template use File | Settings | File Templates. - */ -public class AdaptiveContextUnitTest { - // TODO - // TODO actually need unit tests when we have validated the value of this approach - // TODO particularly before we attempt to optimize the algorithm - // TODO - - // -------------------------------------------------------------------------------- - // - // Provider - // - // -------------------------------------------------------------------------------- - - private class AdaptiveContextTestProvider extends BaseTest.TestDataProvider { - final RecalDatumNode pruned; - final RecalDatumNode full; - - private AdaptiveContextTestProvider(Class c, RecalDatumNode pruned, RecalDatumNode full) { - super(AdaptiveContextTestProvider.class); - this.pruned = pruned; - this.full = full; - } - } - - private RecalDatumNode makeTree(final String context, final int N, final int M, - final RecalDatumNode ... sub) { - final ContextDatum contextDatum = new ContextDatum(context, N, M); - final RecalDatumNode node = new RecalDatumNode(contextDatum); - for ( final RecalDatumNode sub1 : sub ) { - node.addSubnode(sub1); - } - return node; - } - - @DataProvider(name = "AdaptiveContextTestProvider") - public Object[][] makeRecalDatumTestProvider() { -// final RecalDatumNode prune1 = -// makeTree("A", 10, 1, -// makeTree("AA", 11, 2), -// makeTree("AC", 12, 3), -// makeTree("AG", 13, 4), -// makeTree("AT", 14, 5)); -// -// new AdaptiveContextTestProvider(pruned, full); - - return AdaptiveContextTestProvider.getTests(AdaptiveContextTestProvider.class); - } - - @Test(dataProvider = "AdaptiveContextTestProvider") - public void testAdaptiveContextFill(AdaptiveContextTestProvider cfg) { - - } -} From 238d55cb61935085ddc7462b385c30b8a603689c Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Mon, 6 Aug 2012 20:22:12 -0400 Subject: [PATCH 074/176] Fixes for running HaplotypeCaller with reduced reads: a) minor refactoring, pulled out code to compute mean representative count to ReadUtils, b) Don't use min representative count over kmer when constructing de Bruijn graph - this creates many paths with multiplicity=1 and makes us lose a lot of SNP's at edge of capture targets. Use mean instead --- .../haplotypecaller/LikelihoodCalculationEngine.java | 12 ++---------- .../haplotypecaller/SimpleDeBruijnAssembler.java | 6 ++++-- .../broadinstitute/sting/utils/sam/ReadUtils.java | 9 +++++++++ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 939b3c375..dfe6dcc3a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -30,6 +30,7 @@ import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -92,7 +93,7 @@ public class LikelihoodCalculationEngine { final int[][] readCounts = new int[numHaplotypes][numReads]; for( int iii = 0; iii < numReads; iii++ ) { final GATKSAMRecord read = reads.get(iii); - final int readCount = getRepresentativeReadCount(read); + final int readCount = ReadUtils.getMeanRepresentativeReadCount(read); final byte[] overallGCP = new byte[read.getReadLength()]; Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? @@ -123,15 +124,6 @@ public class LikelihoodCalculationEngine { } } - private static int getRepresentativeReadCount(GATKSAMRecord read) { - if (!read.isReducedRead()) - return 1; - - // compute mean representative read counts - final byte[] counts = read.getReducedReadCounts(); - return MathUtils.sum(counts)/counts.length; - } - private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) { for( int iii = 0; iii < b1.length && iii < b2.length; iii++ ){ if( b1[iii] != b2[iii] ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index f3dd3babb..7319eb54d 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -198,8 +198,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } int countNumber = 1; if (read.isReducedRead()) { - // compute min (?) number of reduced read counts in current kmer span - countNumber = MathUtils.arrayMin(Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1)); + // compute mean number of reduced read counts in current kmer span + final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1); + // precise rounding can make a difference with low consensus counts + countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length); } if( !badKmer ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 6b9ba79b4..c16470c48 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -56,6 +56,15 @@ public class ReadUtils { private static int DEFAULT_ADAPTOR_SIZE = 100; public static int CLIPPING_GOAL_NOT_REACHED = -1; + public static int getMeanRepresentativeReadCount(GATKSAMRecord read) { + if (!read.isReducedRead()) + return 1; + + // compute mean representative read counts + final byte[] counts = read.getReducedReadCounts(); + return (int)Math.round((double)MathUtils.sum(counts)/counts.length); + } + /** * A marker to tell which end of the read has been clipped */ From c66a896b8e17266390d8dd076511421214ac9250 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Mon, 6 Aug 2012 21:29:21 -0400 Subject: [PATCH 075/176] Fix UG integration test broken by new -maxAltAlleles nomenclature --- ...UnifiedGenotyperGeneralPloidyIntegrationTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index 9b3103274..7294be500 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -46,33 +46,33 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d"); + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d"); } @Test public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3"); } @Test public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b"); } @Test public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9"); } @Test public void testMT_SNP_DISCOVERY_sp4() { - PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7"); + PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7"); } @Test public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a"); + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a"); } } From 2c76f71a03c386091fce5d477a593426bce46337 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 6 Aug 2012 22:48:04 -0400 Subject: [PATCH 076/176] Update -maxAlleles argument in integration tests --- ...iedGenotyperGeneralPloidyIntegrationTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index 9b3103274..4d8c6808f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -46,33 +46,33 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d"); + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d"); } @Test public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3"); } @Test - public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b"); + public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b"); } @Test - public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9"); + public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9"); } @Test public void testMT_SNP_DISCOVERY_sp4() { - PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7"); + PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7"); } @Test public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a"); + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a"); } } From 15085bf03e2c29e921df15ef017d3abb14ea5921 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 7 Aug 2012 13:58:22 -0400 Subject: [PATCH 077/176] The UnifiedGenotyper now makes use of base insertion and base deletion quality scores if they exist in the reads. --- .../gatk/walkers/indels/PairHMMIndelErrorModel.java | 4 +++- .../genotyper/UnifiedGenotyperIntegrationTest.java | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 6bfe5702d..65c5a2fbc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -351,7 +351,9 @@ public class PairHMMIndelErrorModel { previousHaplotypeSeen = haplotypeBases.clone(); readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, - contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities, + (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), + (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), + contextLogGapContinuationProbabilities, startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index d976e3e22..7b6e1ee96 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -355,6 +355,19 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec); } + @Test + public void testBaseIndelQualityScores() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommandIndelsb37 + + " -I " + privateTestDir + "NA12878.100kb.BQSRv2.example.bam" + + " -o %s" + + " -L 20:10,000,000-10,100,000", + 1, + Arrays.asList("b3c923ed9efa04b85fc18a9b45c8d2a6")); + + executeTest(String.format("test UG with base indel quality scores"), spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing SnpEff From 982c735c7665219331c9f0b29d317dfa531d93bd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 7 Aug 2012 08:38:16 -0400 Subject: [PATCH 078/176] VisualizeAdaptiveTree now considers only leaf nodes when computing max/min penalty --- .../utils/recalibration/RecalDatumNode.java | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index e792a808d..a0b3f2b0a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -169,10 +169,10 @@ public class RecalDatumNode { * The maximum penalty among all nodes * @return */ - public double maxPenalty() { - double max = getPenalty(); + public double maxPenalty(final boolean leafOnly) { + double max = ! leafOnly || isLeaf() ? getPenalty() : Double.MIN_VALUE; for ( final RecalDatumNode sub : subnodes ) - max = Math.max(max, sub.maxPenalty()); + max = Math.max(max, sub.maxPenalty(leafOnly)); return max; } @@ -180,10 +180,10 @@ public class RecalDatumNode { * The minimum penalty among all nodes * @return */ - public double minPenalty() { - double min = getPenalty(); + public double minPenalty(final boolean leafOnly) { + double min = ! leafOnly || isLeaf() ? getPenalty() : Double.MAX_VALUE; for ( final RecalDatumNode sub : subnodes ) - min = Math.min(min, sub.minPenalty()); + min = Math.min(min, sub.minPenalty(leafOnly)); return min; } @@ -251,7 +251,7 @@ public class RecalDatumNode { * @return the chi2 penalty, or 0.0 if it cannot be calculated */ private double calcPenalty() { - if ( isLeaf() ) + if ( isLeaf() || freeToMerge() ) return 0.0; else if ( subnodes.size() == 1 ) // only one value, so its free to merge away @@ -277,6 +277,22 @@ public class RecalDatumNode { } } + /** + * Is this node free to merge because its rounded Q score is the same as all nodes below + * @return + */ + private boolean freeToMerge() { + if ( isLeaf() ) // leaves are free to merge + return true; + else { + final byte myQual = getRecalDatum().getEmpiricalQualityAsByte(); + for ( final RecalDatumNode sub : subnodes ) + if ( sub.getRecalDatum().getEmpiricalQualityAsByte() != myQual ) + return false; + return true; + } + } + /** * Calculate the penalty of this interval, given the overall error rate for the interval * @@ -346,8 +362,6 @@ public class RecalDatumNode { while ( root.size() > maxElements ) { // remove the lowest penalty element, and continue root = root.removeLowestPenaltyNode(); - if ( logger.isDebugEnabled() ) - logger.debug("pruneByPenalty root size is now " + root.size() + " of max " + maxElements); } // our size is below the target, so we are good, return @@ -363,7 +377,8 @@ public class RecalDatumNode { */ private RecalDatumNode removeLowestPenaltyNode() { final Pair, Double> nodeToRemove = getMinPenaltyAboveLeafNode(); - //logger.info("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond()); + if ( logger.isDebugEnabled() ) + logger.debug("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond()); final Pair, Boolean> result = removeNode(nodeToRemove.getFirst()); From 80b94a4f9a456e8bb8c60ac06e34e8427d3ee608 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 7 Aug 2012 16:36:57 -0400 Subject: [PATCH 079/176] AdaptiveContexts implement pruning to a given chi2 p value -- Added bonferroni corrected p-value pruning, so you tell it how significant of a different you are willing to collapse in the tree, and it prunes the tree down to this maximum threshold -- Penalty is now a phred-scaled p-value not the raw chi2 value -- Split command line arguments in VisualizeContextTree into separate arguments for each type of pruning --- .../utils/recalibration/RecalDatumNode.java | 72 ++++++++++++++++--- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index a0b3f2b0a..55d3ca13f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.recalibration; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import org.apache.commons.math.MathException; import org.apache.commons.math.stat.inference.ChiSquareTestImpl; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.collections.Pair; @@ -19,6 +20,7 @@ import java.util.Set; * @since 07/27/12 */ public class RecalDatumNode { + private final static double SMALLEST_CHI2_PVALUE = 1e-300; protected static Logger logger = Logger.getLogger(RecalDatumNode.class); /** @@ -150,8 +152,6 @@ public class RecalDatumNode { * definition have 0 penalty unless they represent a pruned tree with underlying -- but now * pruned -- subtrees * - * TODO -- can we really just add together the chi2 values? - * * @return */ public double totalPenalty() { @@ -244,11 +244,12 @@ public class RecalDatumNode { } /** - * Calculate the chi^2 penalty among subnodes of this node. The chi^2 value - * indicates the degree of independence of the implied error rates among the + * Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node. + * + * The chi^2 value indicates the degree of independence of the implied error rates among the * immediate subnodes * - * @return the chi2 penalty, or 0.0 if it cannot be calculated + * @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated */ private double calcPenalty() { if ( isLeaf() || freeToMerge() ) @@ -267,13 +268,18 @@ public class RecalDatumNode { i++; } - final double chi2 = new ChiSquareTestImpl().chiSquare(counts); + try { + final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts); + final double penalty = -10 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE)); - // make sure things are reasonable and fail early if not - if (Double.isInfinite(chi2) || Double.isNaN(chi2)) - throw new ReviewedStingException("chi2 value is " + chi2 + " at " + getRecalDatum()); + // make sure things are reasonable and fail early if not + if (Double.isInfinite(penalty) || Double.isNaN(penalty)) + throw new ReviewedStingException("chi2 value is " + chi2PValue + " at " + getRecalDatum()); - return chi2; + return penalty; + } catch ( MathException e ) { + throw new ReviewedStingException("Failed in calculating chi2 value", e); + } } } @@ -368,6 +374,50 @@ public class RecalDatumNode { return root; } + /** + * Return a freshly allocated tree where all mergable nodes with < maxPenalty are merged + * + * Note that nodes must have fixed penalties to this algorithm will fail. + * + * @param maxPenaltyIn the maximum penalty we are allowed to incur for a merge + * @param applyBonferroniCorrection if true, we will adjust penalty by the phred-scaled bonferroni correction + * for the size of the initial tree. That is, if there are 10 nodes in the + * tree and maxPenalty is 20 we will actually enforce 10^-2 / 10 = 10^-3 = 30 + * penalty for multiple testing + * @return + */ + public RecalDatumNode pruneToNoMoreThanPenalty(final double maxPenaltyIn, final boolean applyBonferroniCorrection) { + RecalDatumNode root = this; + + final double bonferroniCorrection = 10 * Math.log10(this.size()); + final double maxPenalty = applyBonferroniCorrection ? maxPenaltyIn + bonferroniCorrection : maxPenaltyIn; + + if ( applyBonferroniCorrection ) + logger.info(String.format("Applying Bonferroni correction for %d nodes = %.2f to initial penalty %.2f for total " + + "corrected max penalty of %.2f", this.size(), bonferroniCorrection, maxPenaltyIn, maxPenalty)); + + while ( true ) { + final Pair, Double> minPenaltyNode = root.getMinPenaltyAboveLeafNode(); + + if ( minPenaltyNode == null || minPenaltyNode.getSecond() > maxPenalty ) { + // nothing to merge, or the best candidate is above our max allowed + if ( minPenaltyNode == null ) + if ( logger.isDebugEnabled() ) logger.debug("Stopping because no candidates could be found"); + else + if ( logger.isDebugEnabled() ) logger.debug("Stopping because node " + minPenaltyNode.getFirst() + " has penalty " + minPenaltyNode.getSecond() + " > max " + maxPenalty); + break; + } else { + // remove the lowest penalty element, and continue + if ( logger.isDebugEnabled() ) logger.debug("Removing node " + minPenaltyNode.getFirst() + " with penalty " + minPenaltyNode.getSecond()); + root = root.removeLowestPenaltyNode(); + } + } + + // no more candidates exist with penalty < maxPenalty + return root; + } + + /** * Find the lowest penalty above leaf node in the tree, and return a tree without it * @@ -394,7 +444,7 @@ public class RecalDatumNode { /** * Finds in the tree the node with the lowest penalty whose subnodes are all leaves * - * @return + * @return the node and its penalty, or null if no such node exists */ private Pair, Double> getMinPenaltyAboveLeafNode() { if ( isLeaf() ) From cda8d944b741f5d9ee0cabb9b5bc3acdd4ec83aa Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 7 Aug 2012 17:22:27 -0400 Subject: [PATCH 080/176] Bugfixes for BCF with VQSR -- Old version converted doubles directly from strings. New version uses VariantContext getAttributeAsDouble() that looks at the values directly to determine how to convert from Object to Double (via Double.valueOf, (Double), or (Double)(Integer)). -- getAttributeAsDouble() is now smart in converting integers to doubles as needed -- Removed unnecessary logging info in BCF2Codec -- Added integration tests to ensure that VQSR works end-to-end with BCF2 using sites version of the file khalid sent to me -- Added vqsr.bcf_test.snps.unfiltered.bcf file for this integration test --- .../VariantDataManager.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Codec.java | 1 - .../utils/variantcontext/CommonInfo.java | 1 + ...ntRecalibrationWalkersIntegrationTest.java | 59 +++++++++++++++++-- 4 files changed, 57 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 45fdad4f8..e88505f99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -235,7 +235,7 @@ public class VariantDataManager { double value; try { - value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); + value = vc.getAttributeAsDouble( annotationKey, Double.NaN ); if( Double.isInfinite(value) ) { value = Double.NaN; } if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0776b3aa8..244af8517 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -167,7 +167,6 @@ public final class BCF2Codec implements FeatureCodec { // create the config offsets if ( ! header.getContigLines().isEmpty() ) { - logger.info("Found contig lines in BCF2 file, using those"); contigNames.clear(); for ( final VCFContigHeaderLine contig : header.getContigLines()) { if ( contig.getID() == null || contig.getID().equals("") ) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java index fb0d7140d..127f91677 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java @@ -216,6 +216,7 @@ final class CommonInfo { Object x = getAttribute(key); if ( x == null ) return defaultValue; if ( x instanceof Double ) return (Double)x; + if ( x instanceof Integer ) return (Integer)x; return Double.valueOf((String)x); // throws an exception if this isn't a string } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 74d071a90..d1ecbb0bf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -13,7 +13,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { String recalMD5; String cutVCFMD5; public VRTest(String inVCF, String tranchesMD5, String recalMD5, String cutVCFMD5) { - this.inVCF = validationDataLocation + inVCF; + this.inVCF = inVCF; this.tranchesMD5 = tranchesMD5; this.recalMD5 = recalMD5; this.cutVCFMD5 = cutVCFMD5; @@ -25,7 +25,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } } - VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", + VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf", "f360ce3eb2b0b887301be917a9843e2b", // tranches "287fea5ea066bf3fdd71f5ce9b58eab3", // recal file "356b9570817b9389da71fbe991d8b2f5"); // cut VCF @@ -74,14 +74,65 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { executeTest("testApplyRecalibration-"+params.inVCF, spec); } + VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", + "a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches + "1cdf8c9ee77d91d1ba7f002573108bad", // recal file + "62fda105e14b619a1c263855cf56af1d"); // cut VCF + + @DataProvider(name = "VRBCFTest") + public Object[][] createVRBCFTest() { + return new Object[][]{ {bcfTest} }; + //return new Object[][]{ {yriTrio}, {lowPass} }; // Add hg19 chr20 trio calls here + } + + @Test(dataProvider = "VRBCFTest") + public void testVariantRecalibratorWithBCF(VRTest params) { + //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" + + " -resource:truth=true,training=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + + " -resource:training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" + + " -T VariantRecalibrator" + + " -input " + params.inVCF + + " -L 20:10,000,000-20,000,000" + + " --no_cmdline_in_header" + + " -an AC " + // integer value + " -an QD -an ReadPosRankSum -an FS -an InbreedingCoeff " + // floats value + " -mG 2 "+ + " -recalFile %s" + + " -tranchesFile %s", + 2, + Arrays.asList("bcf", "txt"), + Arrays.asList(params.recalMD5, params.tranchesMD5)); + executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst(); + } + + @Test(dataProvider = "VRBCFTest", dependsOnMethods="testVariantRecalibratorWithBCF") + public void testApplyRecalibrationWithBCF(VRTest params) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -T ApplyRecalibration" + + " -L 20:10,000,000-20,000,000" + + " --no_cmdline_in_header" + + " -input " + params.inVCF + + " -U LENIENT_VCF_PROCESSING -o %s" + + " -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) + + " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null), + Arrays.asList(params.cutVCFMD5)); + spec.disableShadowBCF(); + executeTest("testApplyRecalibration-"+params.inVCF, spec); + } + + VRTest indelUnfiltered = new VRTest( - "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as . + validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as . "b7589cd098dc153ec64c02dcff2838e4", // tranches "a04a9001f62eff43d363f4d63769f3ee", // recal file "64f576881e21323dd4078262604717a2"); // cut VCF VRTest indelFiltered = new VRTest( - "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS + validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS "b7589cd098dc153ec64c02dcff2838e4", // tranches "a04a9001f62eff43d363f4d63769f3ee", // recal file "af22c55d91394c56a222fd40d6d54781"); // cut VCF From a7811d673f3abf1ef73ff7e6c4aadbd58d0221c9 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 8 Aug 2012 09:29:54 -0400 Subject: [PATCH 081/176] Update URL for phone home / GATK key documentation output by the GATK upon error --- .../sting/gatk/CommandLineExecutable.java | 4 ++-- .../gatk/arguments/GATKArgumentCollection.java | 4 ++-- .../sting/gatk/phonehome/GATKRunReport.java | 1 + .../sting/utils/exceptions/UserException.java | 14 +++++++------- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index c6bb4a27c..0286cdc52 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -130,8 +130,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram { getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) { if ( getArgumentCollection().gatkKeyFile == null ) { throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + - "for more information and instructions on how to obtain a key."); + "Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + + " for more information and instructions on how to obtain a key."); } else { PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 972116952..1e6920b82 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -66,10 +66,10 @@ public class GATKArgumentCollection { @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false) public Integer readBufferSize = null; - @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) + @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false) public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; - @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) + @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false) public File gatkKeyFile = null; @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index a42c73212..4cf5046a2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -88,6 +88,7 @@ public class GATKRunReport { // number of milliseconds before the S3 put operation is timed-out: private static final long S3PutTimeOut = 10 * 1000; + public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest"; /** * our log diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 52da31362..bda03f675 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.exceptions; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -351,8 +352,8 @@ public class UserException extends ReviewedStingException { public static class UnreadableKeyException extends UserException { public UnreadableKeyException ( File f, Exception e ) { super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", - f.getAbsolutePath(), getMessage(e))); + "Please see %s for help.", + f.getAbsolutePath(), getMessage(e), GATKRunReport.PHONE_HOME_DOCS_URL)); } public UnreadableKeyException ( String message, Exception e ) { @@ -361,8 +362,8 @@ public class UserException extends ReviewedStingException { public UnreadableKeyException ( String message ) { super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", - message)); + "Please see %s for help.", + message, GATKRunReport.PHONE_HOME_DOCS_URL)); } } @@ -370,9 +371,8 @@ public class UserException extends ReviewedStingException { public KeySignatureVerificationException ( File f ) { super(String.format("The signature in key file %s failed cryptographic verification. " + "If this key was valid in the past, it's likely been revoked. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + - "for help.", - f.getAbsolutePath())); + "Please see %s for help.", + f.getAbsolutePath(), GATKRunReport.PHONE_HOME_DOCS_URL)); } } } From 4b2e3cec0b8f8aeb9b22b487a8f81e0db4bd6997 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 8 Aug 2012 14:29:41 -0400 Subject: [PATCH 082/176] Quick pass of FindBugs 'inefficient use of keySet iterator instead of entrySet iterator' fixes for core tools. --- .../walkers/haplotypecaller/HaplotypeCaller.java | 6 +++--- .../LikelihoodCalculationEngine.java | 14 +++++++------- .../walkers/annotator/BaseQualityRankSumTest.java | 8 ++++---- .../sting/gatk/walkers/annotator/FisherStrand.java | 12 ++++++------ .../gatk/walkers/annotator/HaplotypeScore.java | 4 ++-- .../annotator/MappingQualityRankSumTest.java | 8 ++++---- .../gatk/walkers/annotator/ReadPosRankSumTest.java | 9 ++++----- .../walkers/genotyper/ConsensusAlleleCounter.java | 4 ++-- .../walkers/indels/HaplotypeIndelErrorModel.java | 5 +++-- .../gatk/walkers/phasing/PhaseByTransmission.java | 8 ++++---- .../varianteval/VariantEvalReportWriter.java | 8 ++++---- .../sting/utils/interval/IntervalUtils.java | 4 ++-- 12 files changed, 45 insertions(+), 45 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index df786bc20..c1f190080 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -332,11 +332,11 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); - for( final String sample : splitContexts.keySet() ) { + for( Map.Entry sample : splitContexts.entrySet() ) { final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event) Arrays.fill(genotypeLikelihoods, 0.0); - for( final PileupElement p : splitContexts.get(sample).getBasePileup() ) { + for( final PileupElement p : sample.getValue().getBasePileup() ) { final byte qual = ( USE_EXPANDED_TRIGGER_SET ? ( p.isNextToSoftClip() || p.isBeforeInsertion() || p.isAfterInsertion() ? ( p.getQual() > QualityUtils.MIN_USABLE_Q_SCORE ? p.getQual() : (byte) 20 ) : p.getQual() ) : p.getQual() ); @@ -362,7 +362,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem genotypeLikelihoods[BB] += p.getRepresentativeCount() * QualityUtils.qualToErrorProbLog10(qual) + LOG_ONE_THIRD; } } - genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() ); + genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() ); } final ArrayList alleles = new ArrayList(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 0301ebda7..fabf5633f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -53,8 +53,8 @@ public class LikelihoodCalculationEngine { public void computeReadLikelihoods( final ArrayList haplotypes, final HashMap> perSampleReadList ) { int X_METRIC_LENGTH = 0; - for( final String sample : perSampleReadList.keySet() ) { - for( final GATKSAMRecord read : perSampleReadList.get(sample) ) { + for( final Map.Entry> sample : perSampleReadList.entrySet() ) { + for( final GATKSAMRecord read : sample.getValue() ) { final int readLength = read.getReadLength(); if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; } } @@ -326,9 +326,9 @@ public class LikelihoodCalculationEngine { public static Map>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap> perSampleReadList, final HashMap> perSampleFilteredReadList, final Pair>> call) { final Map>> returnMap = new HashMap>>(); final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst()); - for( final String sample : perSampleReadList.keySet() ) { + for( final Map.Entry> sample : perSampleReadList.entrySet() ) { final Map> alleleReadMap = new HashMap>(); - final ArrayList readsForThisSample = perSampleReadList.get(sample); + final ArrayList readsForThisSample = sample.getValue(); for( int iii = 0; iii < readsForThisSample.size(); iii++ ) { final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same! // only count the read if it overlaps the event, otherwise it is not added to the output read list at all @@ -338,7 +338,7 @@ public class LikelihoodCalculationEngine { for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood double maxLikelihood = Double.NEGATIVE_INFINITY; for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object) - final double likelihood = h.getReadLikelihoods(sample)[iii]; + final double likelihood = h.getReadLikelihoods(sample.getKey())[iii]; if( likelihood > maxLikelihood ) { maxLikelihood = likelihood; } @@ -373,13 +373,13 @@ public class LikelihoodCalculationEngine { readList = new ArrayList(); alleleReadMap.put(Allele.NO_CALL, readList); } - for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample) ) { + for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) { // only count the read if it overlaps the event, otherwise it is not added to the output read list at all if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { readList.add(read); } } - returnMap.put(sample, alleleReadMap); + returnMap.put(sample.getKey(), alleleReadMap); } return returnMap; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 0b919da18..bd884892c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -65,12 +65,12 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot // by design, first element in LinkedHashMap was ref allele double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - for (Allele a : el.keySet()) { + for (Map.Entry entry : el.entrySet()) { - if (a.isReference()) - refLikelihood =el.get(a); + if (entry.getKey().isReference()) + refLikelihood = entry.getValue(); else { - double like = el.get(a); + double like = entry.getValue(); if (like >= altLikelihood) altLikelihood = like; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 4669cfef8..131670599 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -291,8 +291,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat int[][] table = new int[2][2]; - for ( String sample : stratifiedContexts.keySet() ) { - final AlignmentContext context = stratifiedContexts.get(sample); + for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + final AlignmentContext context = sample.getValue(); if ( context == null ) continue; @@ -313,12 +313,12 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - for (Allele a : el.keySet()) { + for (Map.Entry entry : el.entrySet()) { - if (a.isReference()) - refLikelihood =el.get(a); + if (entry.getKey().isReference()) + refLikelihood = entry.getValue(); else { - double like = el.get(a); + double like = entry.getValue(); if (like >= altLikelihood) altLikelihood = like; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 45444e05d..ff71c4c73 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -362,8 +362,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot // Score all the reads in the pileup, even the filtered ones final double[] scores = new double[el.size()]; int i = 0; - for (Allele a : el.keySet()) { - scores[i++] = -el.get(a); + for (Map.Entry a : el.entrySet()) { + scores[i++] = -a.getValue(); if (DEBUG) { System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index c7fb7ecba..31067e386 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -61,12 +61,12 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn // by design, first element in LinkedHashMap was ref allele double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - for (Allele a : el.keySet()) { + for (Map.Entry a : el.entrySet()) { - if (a.isReference()) - refLikelihood =el.get(a); + if (a.getKey().isReference()) + refLikelihood = a.getValue(); else { - double like = el.get(a); + double like = a.getValue(); if (like >= altLikelihood) altLikelihood = like; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 630344992..3456041c7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -87,11 +87,11 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio LinkedHashMap el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele - for (Allele a : el.keySet()) { - if (a.isReference()) - refLikelihood = el.get(a); + for (Map.Entry a : el.entrySet()) { + if (a.getKey().isReference()) + refLikelihood = a.getValue(); else { - double like = el.get(a); + double like = a.getValue(); if (like >= altLikelihood) altLikelihood = like; } @@ -100,7 +100,6 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset()); final int numAlignedBases = getNumAlignedBases(p.getRead()); - int rp = readPos; if (readPos > numAlignedBases / 2) { readPos = numAlignedBases - (readPos + 1); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index 869e52216..7dcc95361 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -148,8 +148,8 @@ public class ConsensusAlleleCounter { boolean foundKey = false; // copy of hashmap into temp arrayList ArrayList> cList = new ArrayList>(); - for (String s : consensusIndelStrings.keySet()) { - cList.add(new Pair(s,consensusIndelStrings.get(s))); + for (Map.Entry s : consensusIndelStrings.entrySet()) { + cList.add(new Pair(s.getKey(), s.getValue())); } if (read.getAlignmentEnd() == loc.getStart()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 26023bd2f..3a10620aa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; +import java.util.Map; public class HaplotypeIndelErrorModel { @@ -427,8 +428,8 @@ public class HaplotypeIndelErrorModel { // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi)) // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent int j=0; - for (Allele a: haplotypesInVC.keySet()) { - readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(haplotypesInVC.get(a), read); + for (Map.Entry a: haplotypesInVC.entrySet()) { + readLikelihoods[i][j]= computeReadLikelihoodGivenHaplotype(a.getValue(), read); if (DEBUG) { System.out.print(read.getReadName()+" "); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 3cf1d485e..bbd4bf92f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -426,10 +426,10 @@ public class PhaseByTransmission extends RodWalker, HashMa Map> families = this.getSampleDB().getFamilies(); Set family; ArrayList parents; - for(String familyID : families.keySet()){ - family = families.get(familyID); + for(Map.Entry> familyEntry : families.entrySet()){ + family = familyEntry.getValue(); if(family.size()<2 || family.size()>3){ - logger.info(String.format("Caution: Family %s has %d members; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyID,family.size())); + logger.info(String.format("Caution: Family %s has %d members; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyEntry.getKey(),family.size())); } else{ for(Sample familyMember : family){ @@ -438,7 +438,7 @@ public class PhaseByTransmission extends RodWalker, HashMa if(family.containsAll(parents)) this.trios.add(familyMember); else - logger.info(String.format("Caution: Family %s skipped as it is not a trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyID)); + logger.info(String.format("Caution: Family %s skipped as it is not a trio nor a parent/child pair; At the moment Phase By Transmission only supports trios and parent/child pairs. Family skipped.",familyEntry.getKey())); break; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java index 2a759f2f5..97814075c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java @@ -183,13 +183,13 @@ public class VariantEvalReportWriter { throw new ReviewedStingException("Datamap is empty for analysis " + scanner.getAnalysis()); // add DataPoint's for each field marked as such - for (final Field field : datamap.keySet()) { + for (final Map.Entry field : datamap.entrySet()) { try { - field.setAccessible(true); + field.getKey().setAccessible(true); // this is an atomic value, add a column for it - final String format = datamap.get(field).format(); - table.addColumn(field.getName(), format); + final String format = field.getValue().format(); + table.addColumn(field.getKey().getName(), format); } catch (SecurityException e) { throw new StingException("SecurityException: " + e); } diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 6ee4af288..85e9f362d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -681,8 +681,8 @@ public class IntervalUtils { LinkedHashMap> locsByContig = splitByContig(sorted); List expanded = new ArrayList(); - for (String contig: locsByContig.keySet()) { - List contigLocs = locsByContig.get(contig); + for (Map.Entry> contig: locsByContig.entrySet()) { + List contigLocs = contig.getValue(); int contigLocsSize = contigLocs.size(); GenomeLoc startLoc, stopLoc; From a0196c9f5bc5f79ddd83570692c6f60b22fd8445 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 8 Aug 2012 14:34:16 -0400 Subject: [PATCH 083/176] Quick pass of FindBugs 'method invokes inefficient Number constructor' fixes. --- .../sting/gatk/refdata/VariantContextAdaptors.java | 1 - .../sting/gatk/walkers/indels/SomaticIndelDetector.java | 2 +- .../org/broadinstitute/sting/utils/SequenceDictionaryUtils.java | 2 +- .../broadinstitute/sting/utils/recalibration/QualQuantizer.java | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 1b75a2c70..2b46414a8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -309,7 +309,6 @@ public class VariantContextAdaptors { int index = hapmap.getStart() - ref.getWindow().getStart(); if ( index < 0 ) return null; // we weren't given enough reference context to create the VariantContext - Byte refBaseForIndel = new Byte(ref.getBases()[index]); HashSet alleles = new HashSet(); Allele refSNPAllele = Allele.create(ref.getBase(), true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java index 84a65b9b2..ba16fd709 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java @@ -1304,7 +1304,7 @@ public class SomaticIndelDetector extends ReadWalker { @Override public Integer reduceInit() { - return new Integer(0); + return 0; } diff --git a/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java b/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java index d7a390692..9e10fd670 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java @@ -329,7 +329,7 @@ public class SequenceDictionaryUtils { */ private static class CompareSequenceRecordsByIndex implements Comparator { public int compare(SAMSequenceRecord x, SAMSequenceRecord y) { - return new Integer(x.getSequenceIndex()).compareTo(y.getSequenceIndex()); + return Integer.valueOf(x.getSequenceIndex()).compareTo(y.getSequenceIndex()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java index 62edd5fac..a5a3104a0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QualQuantizer.java @@ -223,7 +223,7 @@ public class QualQuantizer { @Override public int compareTo(final QualInterval qualInterval) { - return new Integer(this.qStart).compareTo(qualInterval.qStart); + return Integer.valueOf(this.qStart).compareTo(qualInterval.qStart); } /** From 4c84cc9486db2139e052b4b58cb8268e93df5099 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 8 Aug 2012 14:42:06 -0400 Subject: [PATCH 084/176] Quick pass of FindBugs 'should be static inner class' fixes. --- .../src/org/broadinstitute/sting/gatk/walkers/ClipReads.java | 2 +- .../sting/gatk/walkers/annotator/HaplotypeScore.java | 2 +- .../genotyper/SNPGenotypeLikelihoodsCalculationModel.java | 2 +- .../sting/gatk/walkers/indels/ConstrainedMateFixingManager.java | 2 +- .../sting/gatk/walkers/indels/RealignerTargetCreator.java | 2 +- .../sting/gatk/walkers/variantutils/SelectVariants.java | 2 +- .../src/org/broadinstitute/sting/utils/clipping/ClippingOp.java | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java index beafd0870..4eaa16692 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java @@ -574,7 +574,7 @@ public class ClipReads extends ReadWalker clipSeqs) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index ff71c4c73..c6d8883c5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -103,7 +103,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot return map; } - private class HaplotypeComparator implements Comparator { + private static class HaplotypeComparator implements Comparator { public int compare(Haplotype a, Haplotype b) { if (a.getQualitySum() < b.getQualitySum()) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index c767cf783..07d5d2f2d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -208,7 +208,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC return new ReadBackedPileupImpl( pileup.getLocation(), BAQedElements ); } - public class BAQedPileupElement extends PileupElement { + public static class BAQedPileupElement extends PileupElement { public BAQedPileupElement( final PileupElement PE ) { super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletedBase(), PE.isAfterDeletedBase(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java index 3dd51fa7d..4feba35af 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java @@ -124,7 +124,7 @@ public class ConstrainedMateFixingManager { return first; } - private class SAMRecordHashObject { + private static class SAMRecordHashObject { public SAMRecord record; public boolean wasModified; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 02e4d414d..fc6df6902 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -332,7 +332,7 @@ public class RealignerTargetCreator extends RodWalker intervals = new TreeSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index e4831eaf2..cf528de09 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -329,7 +329,7 @@ public class SelectVariants extends RodWalker implements TreeR /* Private class used to store the intermediate variants in the integer random selection process */ - private class RandomVariantStructure { + private static class RandomVariantStructure { private VariantContext vc; RandomVariantStructure(VariantContext vcP) { diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index 554188bc1..08c50b982 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -538,7 +538,7 @@ public class ClippingOp { return 0; } - private class CigarShift { + private static class CigarShift { private Cigar cigar; private int shiftFromStart; private int shiftFromEnd; From 0a2a646a5207d7c9b0e70270e62a1f8942356234 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 8 Aug 2012 14:56:27 -0400 Subject: [PATCH 086/176] Other random FindBugs fixes --- .../gatk/walkers/genotyper/UnifiedArgumentCollection.java | 1 - .../gatk/walkers/variantutils/FilterLiftedVariants.java | 8 +++----- .../broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java | 2 +- .../sting/utils/recalibration/RecalDatumNode.java | 5 +++-- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index a885d8a58..e755a1e36 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -263,7 +263,6 @@ public class UnifiedArgumentCollection { uac.referenceSampleName = referenceSampleName; uac.samplePloidy = samplePloidy; uac.maxQualityScore = minQualityScore; - uac.maxQualityScore = maxQualityScore; uac.phredScaledPrior = phredScaledPrior; uac.minPower = minPower; uac.minReferenceDepth = minReferenceDepth; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index d223adefb..f89bcb2a7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -34,15 +34,13 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Arrays; -import java.util.Collection; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * Filters a lifted-over VCF file for ref bases that have been changed. @@ -66,7 +64,7 @@ public class FilterLiftedVariants extends RodWalker { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : null, samples); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : Collections.emptySet(), samples); writer.writeHeader(vcfHeader); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 244af8517..3b9e86c8d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -492,6 +492,6 @@ public final class BCF2Codec implements FeatureCodec { } private final void error(final String message) throws RuntimeException { - throw new UserException.MalformedBCF2(String.format("At record %d with position %d:", recordNo, pos, message)); + throw new UserException.MalformedBCF2(String.format("%s, at record %d with position %d:", message, recordNo, pos)); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index 55d3ca13f..102aa4433 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -401,10 +401,11 @@ public class RecalDatumNode { if ( minPenaltyNode == null || minPenaltyNode.getSecond() > maxPenalty ) { // nothing to merge, or the best candidate is above our max allowed - if ( minPenaltyNode == null ) + if ( minPenaltyNode == null ) { if ( logger.isDebugEnabled() ) logger.debug("Stopping because no candidates could be found"); - else + } else { if ( logger.isDebugEnabled() ) logger.debug("Stopping because node " + minPenaltyNode.getFirst() + " has penalty " + minPenaltyNode.getSecond() + " > max " + maxPenalty); + } break; } else { // remove the lowest penalty element, and continue From 1223d7754652f2f93ee9f140568a4e9cad28e78b Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 8 Aug 2012 15:13:20 -0400 Subject: [PATCH 087/176] Removing argument from HaplotypeCaller that was made unneccesary by recent improvements to triggering around large events --- .../haplotypecaller/HaplotypeCaller.java | 35 +++++++------------ .../gatk/walkers/bqsr/BaseRecalibrator.java | 4 +-- 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index c1f190080..559347843 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -128,9 +128,11 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) protected int MIN_PRUNE_FACTOR = 1; + @Advanced @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false) protected boolean GENOTYPE_FULL_ACTIVE_REGION = false; + @Advanced @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false) protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false; @@ -141,9 +143,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000; - @Argument(fullName="useExpandedTriggerSet", shortName="expandedTriggers", doc = "If specified, use additional, experimental triggers designed to capture larger indels but which may lead to an increase in the false positive rate", required=false) - protected boolean USE_EXPANDED_TRIGGER_SET = false; - @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false) protected boolean USE_ALLELES_TRIGGER = false; @@ -245,8 +244,8 @@ public class HaplotypeCaller extends ActiveRegionWalker implem UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling - UAC.STANDARD_CONFIDENCE_FOR_CALLING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING) ); // low values used for isActive determination only, default/user-specified values used for actual calling - UAC.STANDARD_CONFIDENCE_FOR_EMITTING = (USE_EXPANDED_TRIGGER_SET ? 0.3 : Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING) ); // low values used for isActive determination only, default/user-specified values used for actual calling + UAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING); + UAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); // initialize the output VCF header @@ -332,29 +331,19 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); - for( Map.Entry sample : splitContexts.entrySet() ) { + for( final Map.Entry sample : splitContexts.entrySet() ) { final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event) Arrays.fill(genotypeLikelihoods, 0.0); for( final PileupElement p : sample.getValue().getBasePileup() ) { - final byte qual = ( USE_EXPANDED_TRIGGER_SET ? - ( p.isNextToSoftClip() || p.isBeforeInsertion() || p.isAfterInsertion() ? ( p.getQual() > QualityUtils.MIN_USABLE_Q_SCORE ? p.getQual() : (byte) 20 ) : p.getQual() ) - : p.getQual() ); - if( p.isDeletion() || qual > (USE_EXPANDED_TRIGGER_SET ? QualityUtils.MIN_USABLE_Q_SCORE : (byte) 18) ) { + final byte qual = p.getQual(); + if( p.isDeletion() || qual > (byte) 18) { int AA = 0; final int AB = 1; int BB = 2; - if( USE_EXPANDED_TRIGGER_SET ) { - if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() || - (!p.getRead().getNGSPlatform().equals(NGSPlatform.SOLID) && ((p.getRead().getReadPairedFlag() && p.getRead().getMateUnmappedFlag()) || BadMateFilter.hasBadMate(p.getRead()))) ) { - AA = 2; - BB = 0; - } - } else { - if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) { - AA = 2; - BB = 0; - if( p.isNextToSoftClip() ) { - averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28)); - } + if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) { + AA = 2; + BB = 0; + if( p.isNextToSoftClip() ) { + averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28)); } } genotypeLikelihoods[AA] += p.getRepresentativeCount() * QualityUtils.qualToProbLog10(qual); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 512434e6d..f69a02002 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -246,9 +246,9 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed } /** - * Initialize the reduce step by creating a PrintStream from the filename specified as an argument to the walker. + * Initialize the reduce step by returning 0L * - * @return returns A PrintStream created from the -recalFile filename argument specified to the walker + * @return returns 0L */ public Long reduceInit() { return 0L; From 35cec8530c302d0ad1b8e13e569041c9df899b71 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 8 Aug 2012 21:44:24 -0400 Subject: [PATCH 088/176] Make coverage threshold in FindCoveredIntervals a command-line argument --- .../walkers/diagnostics/targets/FindCoveredIntervals.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 0c856c6df..23e4d5ae0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -46,6 +47,9 @@ public class FindCoveredIntervals extends ActiveRegionWalker { @Output(required = true) private PrintStream out; + @Argument(fullName = "coverage_threshold", shortName = "cov", doc = "The minimum allowable coverage to be considered covered", required = false) + private int coverageThreshold = 20; + @Override // Look to see if the region has sufficient coverage public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { @@ -53,8 +57,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker { int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); // note the linear probability scale - int coverageThreshold = 20; - return new ActivityProfileResult(Math.min((double) depth / coverageThreshold, 1)); + return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1)); } From 71ee8d87b3405e80f69da3344dbf8bf0c4a3198e Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 9 Aug 2012 09:58:20 -0400 Subject: [PATCH 089/176] Rename per-sample ML allelic fractions and counts so that they don't have the same name as the per-site INFO fields, and clarify wording in VCF header --- .../GeneralPloidyExactAFCalculationModel.java | 4 ++-- ...UnifiedGenotyperGeneralPloidyIntegrationTest.java | 12 ++++++------ .../gatk/walkers/genotyper/UnifiedGenotyper.java | 4 ++-- .../sting/utils/codecs/vcf/VCFConstants.java | 2 ++ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java index ba19638e0..78ab11eb1 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyExactAFCalculationModel.java @@ -681,8 +681,8 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula } // per-pool logging of AC and AF - gb.attribute(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); - gb.attribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); + gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); + gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); // remove PLs if necessary if (newLikelihoods.length > MAX_LENGTH_FOR_POOL_PL_LOGGING) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index 4d8c6808f..f62b2250e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -46,33 +46,33 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d"); + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93"); } @Test public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b"); } @Test public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d"); } @Test public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4"); } @Test public void testMT_SNP_DISCOVERY_sp4() { - PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7"); + PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459"); } @Test public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a"); + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7"); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index c1c1339f5..507806fbe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -312,8 +312,8 @@ public class UnifiedGenotyper extends LocusWalker, Unif // add the pool values for each genotype if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { - headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed, for this pool")); - headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed, for this pool")); + headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample")); + headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample")); } if (UAC.referenceSampleName != null) { headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth")); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index 8790a000d..dac58eb10 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -36,6 +36,8 @@ public final class VCFConstants { public static final String MLE_ALLELE_COUNT_KEY = "MLEAC"; public static final String ALLELE_FREQUENCY_KEY = "AF"; public static final String MLE_ALLELE_FREQUENCY_KEY = "MLEAF"; + public static final String MLE_PER_SAMPLE_ALLELE_COUNT_KEY = "MLPSAC"; + public static final String MLE_PER_SAMPLE_ALLELE_FRACTION_KEY = "MLPSAF"; public static final String ALLELE_NUMBER_KEY = "AN"; public static final String RMS_BASE_QUALITY_KEY = "BQ"; public static final String CIGAR_KEY = "CIGAR"; From def077c4e53b4b64949077a173d85dd43b9bc97e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 9 Aug 2012 12:42:50 -0400 Subject: [PATCH 090/176] There's actually a subtle but important difference between foo++ and ++foo --- .../gatk/walkers/diagnostics/targets/FindCoveredIntervals.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 23e4d5ae0..373c8232e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -78,7 +78,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker { public Long reduce(final GenomeLoc value, Long reduce) { if (value != null) { out.println(value.toString()); - return reduce++; + return ++reduce; } else return reduce; } From c7f22e410f5bfe6bbb3ea77187294787fdbc4939 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 9 Aug 2012 16:22:08 -0400 Subject: [PATCH 091/176] A few quick, minor findbugs fixes. --- .../sting/gatk/traversals/TraverseActiveRegions.java | 2 +- .../gatk/walkers/variantrecalibration/VariantDatum.java | 1 + public/java/src/org/broadinstitute/sting/utils/PairHMM.java | 2 +- .../org/broadinstitute/sting/utils/SWPairwiseAlignment.java | 6 +++--- .../sting/utils/activeregion/ActiveRegion.java | 4 ++-- .../sting/utils/activeregion/ActivityProfile.java | 3 --- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 845c4eacf..979e0f2d6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -29,7 +29,7 @@ public class TraverseActiveRegions extends TraversalEngine workQueue = new LinkedList(); private final LinkedHashSet myReads = new LinkedHashSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java index 32350f0fa..a85129d78 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java @@ -52,6 +52,7 @@ public class VariantDatum implements Comparable { public int worstAnnotation; public MultivariateGaussian assignment; // used in K-means implementation + @Override public int compareTo( final VariantDatum other ) { return Double.compare(this.lod, other.lod); } diff --git a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java index 9fcb97a4d..ec4cf3386 100644 --- a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java @@ -36,7 +36,7 @@ import java.util.*; */ public class PairHMM { - private static final int MAX_CACHED_QUAL = (int)Byte.MAX_VALUE; + private static final int MAX_CACHED_QUAL = (int)Byte.MAX_VALUE - 1; private static final byte DEFAULT_GOP = (byte) 45; private static final byte DEFAULT_GCP = (byte) 10; private static final double BANDING_TOLERANCE = 22.0; diff --git a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java index 92d73a5ce..bc1158a4d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java @@ -713,11 +713,11 @@ public class SWPairwiseAlignment { System.err.println("Only one "+argname+" argument is allowed"); System.exit(1); } - if ( l.get(0).equals("true") ) return new Boolean(true); - if ( l.get(0).equals("false") ) return new Boolean(false); + if ( l.get(0).equals("true") ) return Boolean.valueOf(true); + if ( l.get(0).equals("false") ) return Boolean.valueOf(false); System.err.println("Can not parse value provided for "+argname+" argument ("+l.get(0)+"); true/false are allowed"); System.exit(1); - return null; + return Boolean.valueOf(false); // This value isn't used because it is preceded by System.exit(1) } /* ############################################## diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 18276f932..8e660350f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -90,11 +90,11 @@ public class ActiveRegion implements HasGenomeLocation, Comparable public void removeAll( final ArrayList readsToRemove ) { reads.removeAll( readsToRemove ); } public boolean equalExceptReads(final ActiveRegion other) { - if ( ! activeRegionLoc.equals(other.activeRegionLoc) ) return false; + if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false; if ( isActive != other.isActive ) return false; if ( genomeLocParser != other.genomeLocParser ) return false; if ( extension != other.extension ) return false; - if ( ! extendedLoc.equals(other.extendedLoc) ) return false; + if ( extendedLoc.compareTo(other.extendedLoc) != 0 ) return false; return true; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 7e736b7bf..73f3cc487 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -46,7 +46,6 @@ public class ActivityProfile { final boolean presetRegions; GenomeLoc regionStartLoc = null; final List isActiveList; - private GenomeLoc lastLoc = null; private static final int FILTER_SIZE = 80; private static final double[] GaussianKernel; @@ -75,8 +74,6 @@ public class ActivityProfile { public void add(final GenomeLoc loc, final ActivityProfileResult result) { if ( loc.size() != 1 ) throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" ); - if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 ) - throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc); isActiveList.add(result); if( regionStartLoc == null ) { regionStartLoc = loc; From f4c72a26d5683d24239b05be6d4047f3bff38390 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 9 Aug 2012 16:30:58 -0400 Subject: [PATCH 092/176] A few quick, minor findbugs fixes. --- public/java/src/org/broadinstitute/sting/utils/PairHMM.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java index ec4cf3386..15f7a7869 100644 --- a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java @@ -36,7 +36,7 @@ import java.util.*; */ public class PairHMM { - private static final int MAX_CACHED_QUAL = (int)Byte.MAX_VALUE - 1; + private static final Byte MAX_CACHED_QUAL = Byte.MAX_VALUE; private static final byte DEFAULT_GOP = (byte) 45; private static final byte DEFAULT_GCP = (byte) 10; private static final double BANDING_TOLERANCE = 22.0; From 9a0dda71d482c29054f3a45e9a4c9eed8be1ce73 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 9 Aug 2012 09:00:44 -0400 Subject: [PATCH 093/176] BCF2 optimizations -- All low-level reads throw IOException instead of catching it directly. This allows us to not try/catch in readByte, improving performance by 5% or so -- Optimize encodeTypeDescriptor with final variables. Avoid using Math.min instead do inline comparison -- Inlined willOverflow directly in its single use --- .../sting/utils/codecs/bcf2/BCF2Codec.java | 38 +++++++------ .../sting/utils/codecs/bcf2/BCF2Decoder.java | 22 ++++---- .../bcf2/BCF2GenotypeFieldDecoders.java | 21 ++++---- .../codecs/bcf2/BCF2LazyGenotypesDecoder.java | 53 +++++++++++-------- .../sting/utils/codecs/bcf2/BCF2Utils.java | 21 ++------ .../variantcontext/writer/BCF2Encoder.java | 2 +- .../bcf2/BCF2EncoderDecoderUnitTest.java | 4 +- 7 files changed, 81 insertions(+), 80 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 3b9e86c8d..52a8ef0d0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -113,18 +113,22 @@ public final class BCF2Codec implements FeatureCodec { @Override public VariantContext decode( final PositionalBufferedStream inputStream ) { - recordNo++; - final VariantContextBuilder builder = new VariantContextBuilder(); + try { + recordNo++; + final VariantContextBuilder builder = new VariantContextBuilder(); - final int sitesBlockSize = decoder.readBlockSize(inputStream); - final int genotypeBlockSize = decoder.readBlockSize(inputStream); - decoder.readNextBlock(sitesBlockSize, inputStream); - decodeSiteLoc(builder); - final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); + final int sitesBlockSize = decoder.readBlockSize(inputStream); + final int genotypeBlockSize = decoder.readBlockSize(inputStream); + decoder.readNextBlock(sitesBlockSize, inputStream); + decodeSiteLoc(builder); + final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); - decoder.readNextBlock(genotypeBlockSize, inputStream); - createLazyGenotypesDecoder(info, builder); - return builder.fullyDecoded(true).make(); + decoder.readNextBlock(genotypeBlockSize, inputStream); + createLazyGenotypesDecoder(info, builder); + return builder.fullyDecoded(true).make(); + } catch ( IOException e ) { + throw new UserException.CouldNotReadInputFile("Failed to read BCF file", e); + } } @Override @@ -234,7 +238,7 @@ public final class BCF2Codec implements FeatureCodec { * @return */ @Requires({"builder != null"}) - private final void decodeSiteLoc(final VariantContextBuilder builder) { + private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException { final int contigOffset = decoder.decodeInt(BCF2Type.INT32); final String contig = lookupContigName(contigOffset); builder.chr(contig); @@ -253,7 +257,7 @@ public final class BCF2Codec implements FeatureCodec { */ @Requires({"builder != null", "decoder != null"}) @Ensures({"result != null", "result.isValid()"}) - private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) { + private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException { final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT); if ( qual != null ) { builder.log10PError(((Double)qual) / -10.0); @@ -309,7 +313,7 @@ public final class BCF2Codec implements FeatureCodec { * Decode the id field in this BCF2 file and store it in the builder * @param builder */ - private void decodeID( final VariantContextBuilder builder ) { + private void decodeID( final VariantContextBuilder builder ) throws IOException { final String id = (String)decoder.decodeTypedValue(); if ( id == null ) @@ -326,7 +330,7 @@ public final class BCF2Codec implements FeatureCodec { * @return the alleles */ @Requires("nAlleles > 0") - private List decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) { + private List decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) throws IOException { // TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes List alleles = new ArrayList(nAlleles); String ref = null; @@ -356,7 +360,7 @@ public final class BCF2Codec implements FeatureCodec { * Decode the filter field of this BCF2 file and store the result in the builder * @param builder */ - private void decodeFilter( final VariantContextBuilder builder ) { + private void decodeFilter( final VariantContextBuilder builder ) throws IOException { final Object value = decoder.decodeTypedValue(); if ( value == null ) @@ -383,7 +387,7 @@ public final class BCF2Codec implements FeatureCodec { * @param numInfoFields */ @Requires("numInfoFields >= 0") - private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) { + private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException { if ( numInfoFields == 0 ) // fast path, don't bother doing any work if there are no fields return; @@ -443,7 +447,7 @@ public final class BCF2Codec implements FeatureCodec { } @Ensures("result != null") - private final String getDictionaryString() { + private final String getDictionaryString() throws IOException { return getDictionaryString((Integer) decoder.decodeTypedValue()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index 2619a4dae..73137c794 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -129,18 +129,18 @@ public final class BCF2Decoder { // // ---------------------------------------------------------------------- - public final Object decodeTypedValue() { + public final Object decodeTypedValue() throws IOException { final byte typeDescriptor = readTypeDescriptor(); return decodeTypedValue(typeDescriptor); } - public final Object decodeTypedValue(final byte typeDescriptor) { + public final Object decodeTypedValue(final byte typeDescriptor) throws IOException { final int size = decodeNumberOfElements(typeDescriptor); return decodeTypedValue(typeDescriptor, size); } @Requires("size >= 0") - public final Object decodeTypedValue(final byte typeDescriptor, final int size) { + public final Object decodeTypedValue(final byte typeDescriptor, final int size) throws IOException { if ( size == 0 ) { // missing value => null in java return null; @@ -162,7 +162,7 @@ public final class BCF2Decoder { } } - public final Object decodeSingleValue(final BCF2Type type) { + public final Object decodeSingleValue(final BCF2Type type) throws IOException { // TODO -- decodeTypedValue should integrate this routine final int value = decodeInt(type); @@ -210,7 +210,7 @@ public final class BCF2Decoder { } @Ensures("result >= 0") - public final int decodeNumberOfElements(final byte typeDescriptor) { + public final int decodeNumberOfElements(final byte typeDescriptor) throws IOException { if ( BCF2Utils.sizeIsOverflow(typeDescriptor) ) // -1 ensures we explode immediately with a bad size if the result is missing return decodeInt(readTypeDescriptor(), -1); @@ -228,14 +228,14 @@ public final class BCF2Decoder { * @return */ @Requires("BCF2Utils.decodeSize(typeDescriptor) == 1") - public final int decodeInt(final byte typeDescriptor, final int missingValue) { + public final int decodeInt(final byte typeDescriptor, final int missingValue) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); final int i = decodeInt(type); return i == type.getMissingBytes() ? missingValue : i; } @Requires("type != null") - public final int decodeInt(final BCF2Type type) { + public final int decodeInt(final BCF2Type type) throws IOException { return BCF2Utils.readInt(type.getSizeInBytes(), recordStream); } @@ -258,7 +258,7 @@ public final class BCF2Decoder { * @return see description */ @Requires({"type != null", "type.isIntegerType()", "size >= 0"}) - public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) { + public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException { if ( size == 0 ) { return null; } else { @@ -290,7 +290,7 @@ public final class BCF2Decoder { } } - public final int[] decodeIntArray(final byte typeDescriptor, final int size) { + public final int[] decodeIntArray(final byte typeDescriptor, final int size) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); return decodeIntArray(size, type, null); } @@ -311,7 +311,7 @@ public final class BCF2Decoder { * @param inputStream * @return */ - public final int readBlockSize(final InputStream inputStream) { + public final int readBlockSize(final InputStream inputStream) throws IOException { return BCF2Utils.readInt(4, inputStream); } @@ -345,7 +345,7 @@ public final class BCF2Decoder { } } - public final byte readTypeDescriptor() { + public final byte readTypeDescriptor() throws IOException { return BCF2Utils.readByte(recordStream); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java index 0dadc49f9..e4ae96262 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; +import java.io.IOException; import java.util.*; /** @@ -105,12 +106,12 @@ public class BCF2GenotypeFieldDecoders { final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, - final GenotypeBuilder[] gbs); + final GenotypeBuilder[] gbs) throws IOException; } private class GTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES ) fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs); else { @@ -135,7 +136,7 @@ public class BCF2GenotypeFieldDecoders { private final void fastBiallelicDiploidDecode(final List siteAlleles, final BCF2Decoder decoder, final byte typeDescriptor, - final GenotypeBuilder[] gbs) { + final GenotypeBuilder[] gbs) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); final int nPossibleGenotypes = 3 * 3; @@ -177,7 +178,7 @@ public class BCF2GenotypeFieldDecoders { final int ploidy, final BCF2Decoder decoder, final byte typeDescriptor, - final GenotypeBuilder[] gbs) { + final GenotypeBuilder[] gbs) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); // a single cache for the encoded genotypes, since we don't actually need this vector @@ -216,7 +217,7 @@ public class BCF2GenotypeFieldDecoders { private class DPDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { // the -1 is for missing gb.DP(decoder.decodeInt(typeDescriptor, -1)); @@ -226,7 +227,7 @@ public class BCF2GenotypeFieldDecoders { private class GQDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { // the -1 is for missing gb.GQ(decoder.decodeInt(typeDescriptor, -1)); @@ -236,7 +237,7 @@ public class BCF2GenotypeFieldDecoders { private class ADDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { gb.AD(decoder.decodeIntArray(typeDescriptor, numElements)); } @@ -245,7 +246,7 @@ public class BCF2GenotypeFieldDecoders { private class PLDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { gb.PL(decoder.decodeIntArray(typeDescriptor, numElements)); } @@ -254,7 +255,7 @@ public class BCF2GenotypeFieldDecoders { private class GenericDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { Object value = decoder.decodeTypedValue(typeDescriptor, numElements); if ( value != null ) { // don't add missing values @@ -273,7 +274,7 @@ public class BCF2GenotypeFieldDecoders { private class FTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { Object value = decoder.decodeTypedValue(typeDescriptor, numElements); assert value == null || value instanceof String; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java index 35fb2e97a..cf34a8b48 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java @@ -26,9 +26,11 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Requires; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; +import java.io.IOException; import java.util.*; /** @@ -64,33 +66,38 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser { if ( logger.isDebugEnabled() ) logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each"); - // load our byte[] data into the decoder - final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes); + try { - for ( int i = 0; i < nSamples; i++ ) - builders[i].reset(true); + // load our byte[] data into the decoder + final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes); - for ( int i = 0; i < nFields; i++ ) { - // get the field name - final int offset = (Integer) decoder.decodeTypedValue(); - final String field = codec.getDictionaryString(offset); + for ( int i = 0; i < nSamples; i++ ) + builders[i].reset(true); - // the type of each element - final byte typeDescriptor = decoder.readTypeDescriptor(); - final int numElements = decoder.decodeNumberOfElements(typeDescriptor); - final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field); - try { - fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); - } catch ( ClassCastException e ) { - throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field - + " inconsistent with the value observed in the decoded value"); + for ( int i = 0; i < nFields; i++ ) { + // get the field name + final int offset = (Integer) decoder.decodeTypedValue(); + final String field = codec.getDictionaryString(offset); + + // the type of each element + final byte typeDescriptor = decoder.readTypeDescriptor(); + final int numElements = decoder.decodeNumberOfElements(typeDescriptor); + final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field); + try { + fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); + } catch ( ClassCastException e ) { + throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field + + " inconsistent with the value observed in the decoded value"); + } } + + final ArrayList genotypes = new ArrayList(nSamples); + for ( final GenotypeBuilder gb : builders ) + genotypes.add(gb.make()); + + return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); + } catch ( IOException e ) { + throw new ReviewedStingException("Unexpected IOException parsing already read genotypes data block", e); } - - final ArrayList genotypes = new ArrayList(nSamples); - for ( final GenotypeBuilder gb : builders ) - genotypes.add(gb.make()); - - return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index c79abe2ae..3454d0c3c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -97,9 +97,8 @@ public final class BCF2Utils { @Requires({"nElements >= 0", "type != null"}) public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { - int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER); - byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F)); - return typeByte; + final int encodeSize = nElements > MAX_INLINE_ELEMENTS ? OVERFLOW_ELEMENT_MARKER : nElements; + return (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F)); } @Ensures("result >= 0") @@ -121,18 +120,8 @@ public final class BCF2Utils { return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER; } - @Requires("nElements >= 0") - public static boolean willOverflow(final long nElements) { - return nElements > MAX_INLINE_ELEMENTS; - } - - public static byte readByte(final InputStream stream) { - // TODO -- shouldn't be capturing error here - try { - return (byte)(stream.read() & 0xFF); - } catch ( IOException e ) { - throw new ReviewedStingException("readByte failure", e); - } + public static byte readByte(final InputStream stream) throws IOException { + return (byte)(stream.read() & 0xFF); } /** @@ -295,7 +284,7 @@ public final class BCF2Utils { @Requires({"stream != null", "bytesForEachInt > 0"}) - public static int readInt(int bytesForEachInt, final InputStream stream) { + public static int readInt(int bytesForEachInt, final InputStream stream) throws IOException { switch ( bytesForEachInt ) { case 1: { return (byte)(readByte(stream)); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java index 2c1d99546..a2bbfb391 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java @@ -193,7 +193,7 @@ public final class BCF2Encoder { public final void encodeType(final int size, final BCF2Type type) throws IOException { final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type); encodeStream.write(typeByte); - if ( BCF2Utils.willOverflow(size) ) { + if ( size > BCF2Utils.MAX_INLINE_ELEMENTS ) { // write in the overflow size encodeTypedInt(size); } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java index 7569ce90d..77050c069 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java @@ -537,11 +537,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { return record; } - private final void decodeRecord(final List toEncode, final byte[] record) { + private final void decodeRecord(final List toEncode, final byte[] record) throws IOException { decodeRecord(toEncode, new BCF2Decoder(record)); } - private final void decodeRecord(final List toEncode, final BCF2Decoder decoder) { + private final void decodeRecord(final List toEncode, final BCF2Decoder decoder) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { Assert.assertFalse(decoder.blockIsFullyDecoded()); final Object decoded = decoder.decodeTypedValue(); From c6bd9b15ff6639bc8a057e149667cd774873f042 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 9 Aug 2012 11:09:09 -0400 Subject: [PATCH 094/176] BCF2 optimizations -- BCF2Type enum has an overloaded method to read the type as an int from an input stream. This gets rid of a case statement and replaces it with just minimum tiny methods that should be better optimized. As side effect of this optimization is an overall cleaner code organization --- .../sting/utils/codecs/bcf2/BCF2Codec.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Decoder.java | 4 +- .../sting/utils/codecs/bcf2/BCF2Type.java | 110 ++++++++++++++++-- .../sting/utils/codecs/bcf2/BCF2Utils.java | 20 ---- 4 files changed, 103 insertions(+), 33 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 52a8ef0d0..570ca7c1c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -151,7 +151,7 @@ public final class BCF2Codec implements FeatureCodec { logger.info("BCF version " + bcfVersion); - final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream); + final int headerSizeInBytes = BCF2Type.INT32.read(inputStream); if ( headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index 73137c794..8000f2e73 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -236,7 +236,7 @@ public final class BCF2Decoder { @Requires("type != null") public final int decodeInt(final BCF2Type type) throws IOException { - return BCF2Utils.readInt(type.getSizeInBytes(), recordStream); + return type.read(recordStream); } /** @@ -312,7 +312,7 @@ public final class BCF2Decoder { * @return */ public final int readBlockSize(final InputStream inputStream) throws IOException { - return BCF2Utils.readInt(4, inputStream); + return BCF2Type.INT32.read(inputStream); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java index 49f375b25..cb9b0af84 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java @@ -26,6 +26,9 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Requires; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.EnumSet; /** @@ -35,12 +38,82 @@ import java.util.EnumSet; * @since 05/12 */ public enum BCF2Type { - MISSING(0, 0, 0x00), - INT8 (1, 1, 0xFFFFFF80, -127, 127), // todo -- confirm range - INT16(2, 2, 0xFFFF8000, -32767, 32767), - INT32(3, 4, 0x80000000, -2147483647, 2147483647), - FLOAT(5, 4, 0x7F800001), - CHAR (7, 1, 0x00000000); + // the actual values themselves + MISSING(0, 0, 0x00) { + @Override public int read(final InputStream in) throws IOException { + throw new IllegalArgumentException("Cannot read MISSING type"); + } + @Override public void write(final int value, final OutputStream out) throws IOException { + throw new IllegalArgumentException("Cannot write MISSING type"); + } + }, + + // todo -- confirm range + INT8 (1, 1, 0xFFFFFF80, -127, 127) { + @Override + public int read(final InputStream in) throws IOException { + return BCF2Utils.readByte(in); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + //To change body of implemented methods use File | Settings | File Templates. + } + }, + + INT16(2, 2, 0xFFFF8000, -32767, 32767) { + @Override + public int read(final InputStream in) throws IOException { + final int b2 = BCF2Utils.readByte(in) & 0xFF; + final int b1 = BCF2Utils.readByte(in) & 0xFF; + return (short)((b1 << 8) | b2); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + //To change body of implemented methods use File | Settings | File Templates. + } + }, + + INT32(3, 4, 0x80000000, -2147483647, 2147483647) { + @Override + public int read(final InputStream in) throws IOException { + final int b4 = BCF2Utils.readByte(in) & 0xFF; + final int b3 = BCF2Utils.readByte(in) & 0xFF; + final int b2 = BCF2Utils.readByte(in) & 0xFF; + final int b1 = BCF2Utils.readByte(in) & 0xFF; + return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + //To change body of implemented methods use File | Settings | File Templates. + } + }, + + FLOAT(5, 4, 0x7F800001) { + @Override + public int read(final InputStream in) throws IOException { + return INT32.read(in); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + //To change body of implemented methods use File | Settings | File Templates. + } + }, + + CHAR (7, 1, 0x00000000) { + @Override + public int read(final InputStream in) throws IOException { + return INT8.read(in); + } + + @Override + public void write(final int value, final OutputStream out) throws IOException { + //To change body of implemented methods use File | Settings | File Templates. + } + }; private final int id; private final Object missingJavaValue; @@ -48,10 +121,6 @@ public enum BCF2Type { private final int sizeInBytes; private final long minValue, maxValue; - BCF2Type(final int id) { - this(id, -1, 0, 0, 0); - } - BCF2Type(final int id, final int sizeInBytes, final int missingBytes) { this(id, sizeInBytes, missingBytes, 0, 0); } @@ -121,4 +190,25 @@ public enum BCF2Type { public boolean isIntegerType() { return INTEGERS.contains(this); } + + /** + * Read a value from in stream of this BCF2 type as an int [32 bit] collection of bits + * + * For intX and char values this is just the int / byte value of the underlying data represented as a 32 bit int + * For a char the result must be converted to a char by (char)(byte)(0x0F & value) + * For doubles it's necessary to convert subsequently this value to a double via Double.bitsToDouble() + * + * @param in + * @return + * @throws IOException + */ + @Requires("in != null") + public int read(final InputStream in) throws IOException { + throw new IllegalArgumentException("Not implemented"); + } + + @Requires("out != null") + public void write(final int value, final OutputStream out) throws IOException { + throw new IllegalArgumentException("Not implemented"); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 3454d0c3c..632977a7f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -282,26 +282,6 @@ public final class BCF2Utils { else return Collections.singletonList(o); } - - @Requires({"stream != null", "bytesForEachInt > 0"}) - public static int readInt(int bytesForEachInt, final InputStream stream) throws IOException { - switch ( bytesForEachInt ) { - case 1: { - return (byte)(readByte(stream)); - } case 2: { - final int b2 = readByte(stream) & 0xFF; - final int b1 = readByte(stream) & 0xFF; - return (short)((b1 << 8) | b2); - } case 4: { - final int b4 = readByte(stream) & 0xFF; - final int b3 = readByte(stream) & 0xFF; - final int b2 = readByte(stream) & 0xFF; - final int b1 = readByte(stream) & 0xFF; - return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4); - } default: throw new ReviewedStingException("Unexpected size during decoding"); - } - } - public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException { switch ( type.getSizeInBytes() ) { case 1: From 06258c8a0154db2ac3fca6e20fe7036e27794485 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 9 Aug 2012 15:34:07 -0400 Subject: [PATCH 095/176] BCF2 optimizations -- Added Write method to BCF2 types that directly converts int value to byte stream. Deleted writeRawBytes(int) -- encodeTypeDescriptor semi-inlined into encodeType so that the tests for overflow are done in just one place -- Faster implementation of determineIntegerType for int[] values --- .../sting/utils/codecs/bcf2/BCF2Decoder.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Type.java | 15 ++++-- .../sting/utils/codecs/bcf2/BCF2Utils.java | 54 +++++-------------- .../variantcontext/writer/BCF2Encoder.java | 13 +++-- .../variantcontext/writer/BCF2Writer.java | 6 +-- 5 files changed, 35 insertions(+), 55 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index 8000f2e73..d7f59632c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -295,7 +295,7 @@ public final class BCF2Decoder { return decodeIntArray(size, type, null); } - public final double rawFloatToFloat(final int rawFloat) { + private double rawFloatToFloat(final int rawFloat) { return (double)Float.intBitsToFloat(rawFloat); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java index cb9b0af84..6fd698ff6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java @@ -57,7 +57,7 @@ public enum BCF2Type { @Override public void write(final int value, final OutputStream out) throws IOException { - //To change body of implemented methods use File | Settings | File Templates. + out.write(0xFF & value); // TODO -- do we need this operation? } }, @@ -71,7 +71,9 @@ public enum BCF2Type { @Override public void write(final int value, final OutputStream out) throws IOException { - //To change body of implemented methods use File | Settings | File Templates. + // TODO -- optimization -- should we put this in a local buffer? + out.write((0x00FF & value)); + out.write((0xFF00 & value) >> 8); } }, @@ -87,7 +89,10 @@ public enum BCF2Type { @Override public void write(final int value, final OutputStream out) throws IOException { - //To change body of implemented methods use File | Settings | File Templates. + out.write((0x000000FF & value)); + out.write((0x0000FF00 & value) >> 8); + out.write((0x00FF0000 & value) >> 16); + out.write((0xFF000000 & value) >> 24); } }, @@ -99,7 +104,7 @@ public enum BCF2Type { @Override public void write(final int value, final OutputStream out) throws IOException { - //To change body of implemented methods use File | Settings | File Templates. + INT32.write(value, out); } }, @@ -111,7 +116,7 @@ public enum BCF2Type { @Override public void write(final int value, final OutputStream out) throws IOException { - //To change body of implemented methods use File | Settings | File Templates. + INT8.write(value, out); } }; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 632977a7f..e6e78d89d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -95,10 +95,9 @@ public final class BCF2Utils { return dict; } - @Requires({"nElements >= 0", "type != null"}) + @Requires({"nElements >= 0", "nElements <= OVERFLOW_ELEMENT_MARKER", "type != null"}) public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { - final int encodeSize = nElements > MAX_INLINE_ELEMENTS ? OVERFLOW_ELEMENT_MARKER : nElements; - return (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F)); + return (byte)((0x0F & nElements) << 4 | (type.getID() & 0x0F)); } @Ensures("result >= 0") @@ -216,18 +215,18 @@ public final class BCF2Utils { @Ensures("result.isIntegerType()") public static BCF2Type determineIntegerType(final int[] values) { - // literally a copy of the code below, but there's no general way to unify lists and arrays in java - BCF2Type maxType = BCF2Type.INT8; - for ( final int value : values ) { - final BCF2Type type1 = determineIntegerType(value); - switch ( type1 ) { - case INT8: break; - case INT16: maxType = BCF2Type.INT16; break; - case INT32: return BCF2Type.INT32; // fast path for largest possible value - default: throw new ReviewedStingException("Unexpected integer type " + type1 ); - } + // find the min and max values in the array + int max = 0, min = 0; + for ( final int v : values ) { + if ( v > max ) max = v; + if ( v < min ) min = v; } - return maxType; + + final BCF2Type maxType = determineIntegerType(max); + final BCF2Type minType = determineIntegerType(min); + + // INT8 < INT16 < INT32 so this returns the larger of the two + return maxType.compareTo(minType) >= 0 ? maxType : minType; } /** @@ -281,31 +280,4 @@ public final class BCF2Utils { else if ( o instanceof List ) return (List)o; else return Collections.singletonList(o); } - - public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException { - switch ( type.getSizeInBytes() ) { - case 1: - encodeStream.write(0xFF & value); - break; - case 2: - encodeStream.write((0x00FF & value)); - encodeStream.write((0xFF00 & value) >> 8); - break; - case 4: - encodeStream.write((0x000000FF & value)); - encodeStream.write((0x0000FF00 & value) >> 8); - encodeStream.write((0x00FF0000 & value) >> 16); - encodeStream.write((0xFF000000 & value) >> 24); - break; - default: - throw new ReviewedStingException("BUG: unexpected type size " + type); - } -// general case for reference -// for ( int i = type.getSizeInBytes() - 1; i >= 0; i-- ) { -// final int shift = i * 8; -// int mask = 0xFF << shift; -// int byteValue = (mask & value) >> shift; -// encodeStream.write(byteValue); -// } - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java index a2bbfb391..01dac7eb6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java @@ -191,9 +191,12 @@ public final class BCF2Encoder { @Requires("size >= 0") @Ensures("encodeStream.size() > old(encodeStream.size())") public final void encodeType(final int size, final BCF2Type type) throws IOException { - final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type); - encodeStream.write(typeByte); - if ( size > BCF2Utils.MAX_INLINE_ELEMENTS ) { + if ( size <= BCF2Utils.MAX_INLINE_ELEMENTS ) { + final int typeByte = BCF2Utils.encodeTypeDescriptor(size, type); + encodeStream.write(typeByte); + } else { + final int typeByte = BCF2Utils.encodeTypeDescriptor(BCF2Utils.OVERFLOW_ELEMENT_MARKER, type); + encodeStream.write(typeByte); // write in the overflow size encodeTypedInt(size); } @@ -201,12 +204,12 @@ public final class BCF2Encoder { @Ensures("encodeStream.size() > old(encodeStream.size())") public final void encodeRawInt(final int value, final BCF2Type type) throws IOException { - BCF2Utils.encodeRawBytes(value, type, encodeStream); + type.write(value, encodeStream); } @Ensures("encodeStream.size() > old(encodeStream.size())") public final void encodeRawBytes(final int value, final BCF2Type type) throws IOException { - BCF2Utils.encodeRawBytes(value, type, encodeStream); + type.write(value, encodeStream); } // -------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 32377d09e..a080c4e62 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -158,7 +158,7 @@ class BCF2Writer extends IndexingVariantContextWriter { final byte[] headerBytes = capture.toByteArray(); new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream); - BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream); + BCF2Type.INT32.write(headerBytes.length, outputStream); outputStream.write(headerBytes); } catch (IOException e) { throw new UserException.CouldNotCreateOutputFile("BCF2 stream", "Got IOException while trying to write BCF2 header", e); @@ -359,8 +359,8 @@ class BCF2Writer extends IndexingVariantContextWriter { */ @Requires({"infoBlock.length > 0", "genotypesBlock.length >= 0"}) private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException { - BCF2Utils.encodeRawBytes(infoBlock.length, BCF2Type.INT32, outputStream); - BCF2Utils.encodeRawBytes(genotypesBlock.length, BCF2Type.INT32, outputStream); + BCF2Type.INT32.write(infoBlock.length, outputStream); + BCF2Type.INT32.write(genotypesBlock.length, outputStream); outputStream.write(infoBlock); outputStream.write(genotypesBlock); } From 33625840149633355929c53c36cf9af720c25a7f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 9 Aug 2012 15:34:20 -0400 Subject: [PATCH 096/176] Updating cofoja to the latest version --- ivy.xml | 2 +- ...a-1.0-20110609.jar => cofoja-1.0-r139.jar} | Bin 377018 -> 378205 bytes ...a-1.0-20110609.xml => cofoja-1.0-r139.xml} | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename settings/repository/com.google.code.cofoja/{cofoja-1.0-20110609.jar => cofoja-1.0-r139.jar} (73%) rename settings/repository/com.google.code.cofoja/{cofoja-1.0-20110609.xml => cofoja-1.0-r139.xml} (57%) diff --git a/ivy.xml b/ivy.xml index 6dd5be7a8..0761cb411 100644 --- a/ivy.xml +++ b/ivy.xml @@ -87,7 +87,7 @@ - + diff --git a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.jar b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.jar similarity index 73% rename from settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.jar rename to settings/repository/com.google.code.cofoja/cofoja-1.0-r139.jar index 2197e721faa7b98dbc7ed1a4bfa88b7d7aedb8b9..2cbdd380d89da4778dc0e1e3144b9e91975a711e 100644 GIT binary patch delta 79023 zcmZ5{19W6vvvzFTnb@{%+qP|VCU(cRZQIF&6Wiv*m`sv?-tWElU+cSPtml_r1!vQ#|7DXGMQcjC3JMNNj*Y zOd}i^3ku}B@Fz(rmX2%cmdB|E7N|&r(IFmLdYpN&Gr&oQ`Q_vWc<^%GBobyxx)hhVddn=-aQo&r&NfwUZR5Kb^zj6JDbW%*)|nT znhZ-JMo&=(!SshTbg0(UIzSyPb_5I2_rR|xRa5AH2Mh!9&p;D@ML_&B@MK?5qJNDX zP>O$zQBYdce_a1O+y5T8$%)_u$pm0<5dU1 zEDcEjsMT;#!`HwFbVO@1(UA{pQ`WN)j*;Wch+avDQi&{>C)Ws(h;~Z9!b@}XJiIAG z>Yu28mOGf3Se<=!`UC3FYvpAo3KKhF-PG9nsAswQ(x>ujCE9MC3?-zqGl*P$O zE}SN^HfZuq=(-2C-S*f4%chfu-@|ZD4$0D@3PYU3E7aH?<(W?0io>k;gO=Nk0qLuN z4}TWZ6@fYDfW<~GgH;#s7r(EFVz+^qF6NTUw886dQ4S`w3&3g&?;?{|%;5*`;ai-t zrP7}=n_RX2>yLkesX2$eXYALuoVte`?_y^!bgt|mRhkA`0efc}I|v9;Er&JIg9`qo z#;`i!nfu#uZ8qoOa=fN`EJQBVGXaf&L>*;kVm`17(X!diyh1B-|5{^gWb!MTSEyau z*Z2tqa}0kVaipI5K;z#rGAq`LXQ$1x+Z&1GPCG^%Awu%HKn0RRBf>(%$rI3e!^D_$ z8D)B`9!secvCzO}nVS!*{2Wq3c>7w@3ttx;*{_uw*0|W`7Jw6jKG^vzRCxh}oXg#7 zPBnOh9|M+)DqzFZ62-g_Xy%`aMg>?D7B31MzF}~llc;c0b2P6v#X2i9R+Ej1dOznH z`X0Iu#VlEd=kWU8NXZr(gO7hJX1q>ivhHVA@udm7J)5x46lgfAx8IO#-QMEb+S5+O zxiR!_&!MAH;e&ejIeL>+BcBG?WL+p;TA%GicWL91!1*iqmM`XNG0mpgAARP2yWj5$ zoIT=?>>_+Nkt8&@AL;36J{1;%jg8MQcsRJGH9bI{(5Zz_yC!|+{z;k}CT%6<&v;-u zW96pw&8B~Eh2@~zfV^ysSK9Ua0JEv6U(SOg@&_T7 z+RJ+OK_8}wMRuC}BC9)Sx=*GDqb(*CL|?_O15v`8V9Z0t65 zVa~c4%v(0LR{A>CtUugzY~#F0pZI>f7nH!1EyiZrPKP;h zZb(=}ZeCPoH->VOG7TnE5(TnJhFW9xW?O0%c6-9Bt^|g(zt2xhy1io@W;qmaCw|&R z_L}U6-L#hckjZb=G~5f{UOmkx(+y~|H*c(=kfH|U6A3{Sj)xM}uW66s4f0z*dK+;% z)R=`R+db4By4^%fM{PS0$?Uh4x>ZtZcz;Lmb|;0vhB(#g;`rpgc*)Fg&Y_gJonvs} zmz1y@yMEF>LVf;9nSK4bgpf13SBX)sAs>l!(7xDyViBgT$1QEEf~!)j+^ku#hVhL; zMLePeF@v~5OvD4VnKpGqu&3$YQT7jU;S=xT98ec5HV1j&DZ7LG@9FJ2<&h-wH_*q) zKascqrAY>IU?LddyYn;K_SeJo@1b3=LQP?gn1dyJF$E|^)jtSV;yk3rDnB*1sWBeG zf2+(-w}qwz&+F&#xXrrF;u_Ark5AzTnO&8cnEV)V#e=P}PHt_e`jr#Kfqn^5nMM4& zN%1Fj%gq_M2N<63Y$-b6O;t}KGs4P~C!hB$U7^8>9vK(d2q*eP9ATvWfX=3NGmr#{4V9e}}nQ7)9mZ%9G&{9K^_%=_B7TY?9KFGXk^j3WD*Z`jZvkA%V0uaZE{5r?zx8% zGI7@vC=C`V4U*lxUf8#_l6Zy1@V&gUKY2cu%dZ2J$U=qb%W{T$t{Hc`H@kgbgq-@p z_X7V^BPog04x`4dl0?AX&5OD-M5` z*G0HVI;bIB6l&_BS;)q8;;lkn8w&j558qJm?oi`_;nK(BBX8q8f~ca+%4@MoEBlcC zvi^dEq{d9qb8^9tYy|m;`zZyy!E-MDf}zxX19zh+rrtc_6-N8s9H9_>BX<_@_0ImO zCV(;4cX|&|t_Oo~zy&v1+l?(}$bwx~7DGFGgvm=s;Yl8+`}nn%0tXW{(an^OT2zn4 zF(QMDMsFFVYufpS?QCgOEyY#v`GuNpksGE}5{a`Um&Xgem_vy}#^9=jf;EE2h&^v| zn!xC-dWljryE$lOWZHpvCG%{(mXxo- zJ`#1ici~tkwFf0O*29;*@KIhd8>^>T#ZY^Pm`Xs zbHNi|M>0Ecnr@2)?Y#NQe+aK%*0858j!NCC3dUU-kSyE|E4LvJL%n%b;K*BNkU{!e zgT*Brp(-0>{$s zR;*tu09uIFG?4aZnEjV_bK7;bW{3>7G-S)bcLLg*L+#DB?~uIHTwYFR2!( z9u&TaVpG-ON9SeiD%+TNCyfJle2{Xly}?2-fQX(DJ-E+EA=p8HC3kPGvGOhUpH!1q zNAH-|2}Q9||COtis<|lPXRdImHc|n;+T%wV0jkUNl*BCMasd*6mNWjBv9~i_k}td2 zhTo88DE#Y?_dS6x4aoL#7Ur%O^ef2*Z8W%B}h`$9~l z4Bh;Ga8zX$6a0#_*UaNOGrn*y*IHt900igYR^ES8afMzwx0xXi_T~uY&@ z^#K@}Gmxl^wC^$Nv1qNrswz{wsGDvQ zje7nJ50xyTdsae5_@nDZ^jt8GcO%Rkm{acSrtME?WAbRp&#a7l>_Zy4+)h=L0=W8} z(2@2nd>=R#6M0BCaNfE~tfPWv6nZX%GdK%Q z#Ov5v>NXoASLR!<%>7DCm+^TDhxnZ(i9;KO?Q8A=*v0E$g@jihJ^n zb7@d()22?0^Bl4}c_5 zZ0@ubpK-d*ct*e-2;dBvH@E_#ha5~Cqp?&7E6{hNCVzZZ;>y%*-c$Gb1|}4|9TmuN zl_@ohNIHlWEi}#(k0C`Rg+1=W9M9er)dmGb z(?a%@sbUsK&YV{=Bku|&D{h2jpE=BvZxLX;wMhFZzo*OVZ$j)qKnVNpD06r-+GlYM z+CW0`!IJfEU5^V~tchX=Jk3`D5IB%nkr<5&I?Ry-g_}jp1z?a-D}iGn^PVQ{fl4-% zMCi1K5^;eG&7x*6x^Zz9aV4Z`g|k#$I!d|6+uUldCHXTxiYU+6*-uUfYYGHRzGRC; zM&i}vc0h)MQ06g*#w0BGcP!^5c}^;D`GU=`4MH#Hq-g;36TvWGidl+$I+vYpxJR67 zDcVkRaKlVVmJSpaG}eAy{4*wA@(@Gf zny`;z1ct$fduJaQ@uBLx#s&178d~MP7Q@u6LH+0{zXV;N$nmTysW!TZUQVi$w}*|I ze+9cqmOa4a<#c66ANGqX`c7Sxa#}7k2(LY|c{=n3{@(-x{Hqn!01O1g4FUv2`)~CY zN$*#J!~>M7T`HoBV0@OfY^$clK#B{8O;#PNYTYTOLS&=aRm@|lJVN8PB{y~CR(3C5 zG{1s0T-!^%^-5Mle8 z9GDW9F*|(A4gD?$I~}?J*FFVRhnHa%Ddb#QoesdQna;YmYunhtj1OdLb0Au}PWs7L z)wK^*ry1zz8A6h!r7_5C=qP)=BJpasiB6|VJJVWiKET|3Qqg6gR&@gFm)_oSOm zBn{BE2S~V`Iz(};@L<{)?~H&oQ9?0K7(0)niWuW|SPqSOpmQ}?@0CTVxyi8Q1G?}f6)>D|B(o9Eg3kJo^tv4roEZYKO}lXE-qD25OYXb{ zA94$e9>=8=rro;LA z$njfPHo`r=*qH!w)m}eoOPlM$dOI{q(mWd=DJzS_Z~AkYB+9yP;1B<2vCQZBqiX6e z$29!QF~$Fq+~gKkNq~bLx(G@j<;hLfQB}a*bS7)U>bQe^d^3IN%l_Adx+-oSrj%JWn!z3HAAd--3AAb;Qzn(C^zK zBGqv?6Xu2BduXpYJ1-704Ed(JC0fjd1=fyjv+&K6QgjH=NC5CC>8Hdc8;D0)SNuXx zJdH|K3M9vaFWXHq8tl-*)5=)y)ZmQ=83gS(klooCKba*=1njdJ1^`m`$3tFVpW} zlT{k(7qumG0f0M#T(8zs46m_=YF_AUM#o{V`JhWvcrOBWgBp`uW^HX$tx6be@)D$> z%wINtW|`A+vpIS>q#}dMB-Y;hr$c8Ti$0*dPeGZKI#lMRTelF0wcaC`kn#k0&cYxr zP-*$8UIE!*qa$n(g&a-=pI1J9FuSq8(4ygn#|tx*xF2Mdfk3uqyi}pOgmxH zr*(P2EVMbOiWAmjG7Q!z@%MiD4~4}oWA^O!{?ZV*WUJyN+AuO9^nIqeLbl*!^RukS zx67=++sglM(7}_K!1wzLI1kBifA#M;Q{mT+036BTEv4?x43@_?KqZ7?cP8HQ@ZBl8IMH6KJVt%t)2zn@2?-O zlm9%u`~rY=Ahr|BdE*TF(7%{L7)4o71e0ox-Xv!aHmBl&s5XPYg-L-C*Hqf65h0s# z#ym$rN8_X>;&z0>NuQ`%jsvYp2Ox%MB5*d2ryVRO2oI z(B&Y=T{L(kw6#oio(c}RyIb$j;7|y^lMm5cgfe2LpYdT7OrhLNLP+0uTXJP2Fq8LR zSsgR#iL~07<b$kr$=f*EuFe#?AD`r(Jf8-{o^t+P^qBE0 zL7GgGzku9kqS#;=UGkoPrUbb)qzJk+@&2`gsw+hxf`8#6TniuZH^}OC8e@e$92}67 z2z-9|8HrNyMt7EXi9oDbTx9+vuAQ3O2Mo}C1DJ3kew&>cTWkm5tnfKK&){s~QqY)a zg@4b8g}wUG5fsD9NJ4316dn~Fs}lR;FLFFCO*EhspHsfV5(kP7D6#KRok=8_90rlB z$V7!zncSQq4IiWT#<1oMB@VKX9XjZzqKrO>*};`QA%NR*o~8GBDLQOJ)Vs^ zKh)?u#T*nDV4nemaOKihXT`!RSIeIjcc?zdHc8k|u$t)frs&rF zI>(7jqO#en&ZViS&KxMvj$pP+T%+&Mhw)o_WPr|EZu)(W9o-1nsW+9_!x#%lNbd99 z?)BgXRlE!%_4Rh|w6Tl%2@j8Rl%WNCWY@vLNqOX)zDWR>bD1JooSniC9{pk`Q-tf` zmh1r!h=hWAEAn?~EY*5vMvvlfVcV*3W7-e=-?lKN3mnqt=`g@YVv>9Gy0WikM=dyMC7jw~<1b^P05nR99x9PUVYO7!7^4+L`9yV_ zbV}ek6%4?&f`E(1(dGBcb6@?{VaMP-Ir1CeFrz5o!Cwque1Dmy?QR~7Kx9!NbendJA{L}U?q#N$$4 zN%wRzXg7tDTjdqxLcgGBZAR`kW4V3mJ&<@x{RbKTzrm?lil^>>G>|u5Waxi&5d=~D z|4~T%#ryx2b<6heJp}&Ns~#v25dOc+i7I_h6%q%KtA4Kfw{Crghtk7d4pK<+2aCfd zbmV#Ofr=*>gUF0LM6}wEb#zUSZOa_h1NZeREW%W){Xz17G+q7JM@|Mgc66AtSK9q} z`t#@G8agP(8%>hHiWFyLop~%#*_NEHXXV=IO( zL-Pnw6>f+6xZf3yk7tPkmDZtlumhJ~zcr*kNH254a)wLNk?twKdnzb)R>olOlWjtu z=+RrW@7Q;e=0DDbxVXz|YYMdAO{?Q#yNe{r@PyAH$)ba|adPo!Iltv0(YKM^#58xI zx9ilCocD-9;xf#}Tx`%M5m-!;jPGaIBzghRblgOrr~FVIsTz@V8Wmr6!SSV=)YxJ0 z!y=5MYU1!1$B-an641l2(&eG80{5}eb<&((5KAtacW$<+%Htc>ByA?vtIaueAZV*p z+g55;#&bOHh~wi8cTpeCiD$pL&>kR!Qy3?`IoB2qHz8Lnjb4?*)QUftQ5{Wpr#}Sr z#nWrzgnuEL_Sy46-q1i-^RCgqSEV$sAAxsfwn{*O!O_O3TRDhSR zTtO)wI5ZUZs5F*;&5;iOKzjX6=lr_Mj+@lYM<=(*k`~o3f^Q@eNb+t>@Yr#a6;SGzC8Qz&tR&v!)V^nUWO^wOkZj^^<<`~<{&r)Z|GQj)el3o*(iI0|H{P0=J<8!Z<+JL0umF7ieg+>P^Typ8 z634>bn%zUYS+ih`ph?k}x7shbI>?K8N9a_hZA^3DAdSWy)Aj2CO!baT{IvVAwuH-y z&#Jsdo@m_Rci`RV6rm4kC|6z3Cl5PYq`pAU#$VUsj^W+aOXdHd*djpW)gw7^=q)$4 za8?o6r0Wqwvi-AL-S){SdG2g<2y+$P!dz;t#*v1E+tV4df>Do;eaf0lR*XEMZTOWlA-rcQo)G=!hF;)Eqy zmq_R6K4UZ|=8b>MC3*i%7{y3x=%wvg3Z5Zk06unvNey!`@8_HlS(V3x)K()d(D6OQbW1#iUy!6cUd&>VR_uX={) zn@09~aG_FzjgnAL5M2v4^)YS0s%Jv9i@+EntF%muZY-7aIV5fCoU{_g*lnO=Uj`!* zV6-8`YSxQ;v$e?0+Zp7=tX{YZT0jH5+17XQDepl~f$DgIx++6bLo z$4Z;ds1ErJ5T~*0wkeMC$=F7=sS?PpfG5AWVqos{tHj`Q~T|gt*kHp;e|tF#U|DsChNL`_oMV;HP#x0VoL(E z&UcZ%jM|&wYRhnSYPkmALUwqj8b`oF@)$b|HQc6v9W;F>7BjbY8*tcqj-}>rY0kYm zy~82^6>qFgZpKVnmeBMqI8f=D6+)Bcjuxg({!{Kc=Fnr_&=1u?cRSr>k7~}xfRKcF#VbO7^(z=YE$3Y+n3x1^Su`9c z4r9w51Pgt);S9%WdNoG&0+nF^Uv;5b~`Al%1#61TfOPMAPb2TmPe)PbSroHME3;u1UyZX%{Ah+qk zJD$)`rkakKIxWB6A;u}IRg@sCDyO7a9gXnBE2}ZVr^C`4h&!fai+$a7R>^@wX}GR} zQ z)I!@Acr;&|gSi&|oLyuS-DLWi>>rr`geArf&?haY(Wk6*Z;VIz_^y9K75dCV6>$o~ z_9w{BRWU{y*|4ZRVSD^-w}2n8Cgx7T)*FLUdAM%ys2~)9H>8OE3;&Ta7H~kB%Nq4K zNb{la+CmaEW)ACFA(~5Dl?J}-^%xMd*nw$Ghxjr_y&l>W^Zi#L7gb0%&Tq(aM{%iQ z&|8ym;>5Iv`7@IHOl{IO;3AqMsiZghKINWOc=Q^no|F}b;dF0}O$m~JM}?hE9hcYol={-eMIZva^6Ic~JrEVkS*l2=}hz@CS4Zu^yWyO)v~$Yevj^d_56 zwGqSyE^zI>_rLT$1^C_$^;&`0qu!X38@nYRon*wK#riqx1IA!`l*41OT^>x4kz~E2 zLeuZBEYJ)Y_c2*zSr`vfy;DQ+`Mq1A>l0nb4C(eghSNv89MGz9dh>M zXCphibwd-U0GuOYt8Yn=1*cokiTp#~fxfpahF{#pfZ%MU`$x4cf88M(Tt4TlY`)G; zxuI>=4QJgUD|7eT{f&f!d2-;*!F)=npSk;?p<@}v7S(kj*6KUMj5GmDt;#lw`IN{)tW)_Qy06(kgx3RFbnnRWnFVRvhAd1 zBsSE6Lds+K*=r7ogU=GL_UUMSF7cLqcCxE*RJbN*%Y(lL$RZ^=bZR3AO{1I+q$7?G zWlE?A{Ln+|lYTC*52C8tyr#0}jAU|)&N#Q)Y+XTaO;(y9H0LdHMOZD4yNJ)beu|;s zMtVyMZssoEo7(aZ%NBfuUkaa@PphzzJoJTUYfH#=kb~c3&8p|iQD9FKm*1w?@bh%T4lnuMy)~s%bh}PoLdk}N#u~jL`S;#yJM|1*l2$h z4#_oim7Z`v)|m3TkzB$ILR}`XdXGO4P``)FEoC7wNPI^H2^brO`wZ@ftFfrbV%J18 zTQ+aB^;Ba$v81uP=c0vaUO$DxC4(Eg1q0pJy7l5O+9Q4~4+iY-!hMEF zVe*GdRrsHnzVOnjiJxsyPrcO#f9<opD%UE9nQyf9^O=zg-kOK^;01^PnlZoI*67JY)%vB zS~|V8#J^LQV04BGXJ`p6(2y$UYdL>b)J$)bbaPRW&^}-|eh)5uF0XQl_mttodihoO z1kPTNq}|y~*HLfxwI6S+XSBNy=mYQe^dK&Z&O`2NDsC?a0Q95)yZd!w9|i&e+r0u{ zbWPO;xt&v;#uXoQoq7*Fn<;UN`7#7#9-p; z_M~WSpICIXsu3$nvITm?*$U5#Rgph5OHaBc*)sMFJ#fH^7nH0EbS76I?W}T`G zir=+;9$PD_b3gTTl{Yp6+>#Oy4WEo%U(uMlqe0A*2s}USB6`hEt40MeIN9*r>^A&+av*`Q#T*ETq}* zS*33>!)xcie7^Eayp;q>nqoC-(EH0KiQc%-S)%7>hu!lNaG|pUyyk0Be$X!#Qe2!@ zi0`$|tmB7HTo5oszs>IyQe-w)GCDakVm8^OJ!5k&e5In@N(Jv>)3ridYl`$1>})f` zuG&NhpIizyf1o5{s{)ee1j34X5v^RzhA~Qv&EWEsi|f}sgpLM>WWC>2m9wlhS+%+2 z76xTf=4IaoAfSQ)g8B8XaCDR?NBgo?Ym&n@HI%%|?UZ4yq3w)ePwcY$tP!SSND>}d zriWM0HXYJ44loIyTV92e=P&aB;TwkEn^erg(e)vI;S-^uVp|9|iOz%{;q0(;Ix(jF z*mF|^t;x&=OHNs1$K+4Z3H9OeTZ{f!6pIyv_O^K=sfA;JGQWmO5VZ<3y;2EplDrXK z*a#g-mONxcNTdVX3-}5x|EEw4d+Qugv5k@8N4lb3l%Ww3rZRS^!QztdKxeBa-h_hi zyc0G(gD|~qjT_q4>jQtcR6n@_X$ClH+xcAU9()BF_WTh%4O;b54BaL>hD3}%;^c3f zd=L`o2k|xleU1zk+Qqt=g(e-UMv6Th={>)VEz7&2nHisN=F4nh<wD2t_HFtZ8(>6nJ7mR+NY6CeSat*FK_GwqYJ9N3(ir@M$OG6 zl`hNxoefznl0Sx^`+cc~B!IdjNCOQ4mXhWkLK9A^BIa3#{KIV`a!UjfD7W+5#=Ut$ zfaXd-!c>gBjcQ5!XD_${GPlGcJmg?(5NTuqyLcECJYLc6XN&QYs);WrY$s;_;4jar zum7VAfZQc6#s1X-{viCX9x!7<0=V=-*FgC)$T})JzqbxL&@G^uBX@uS18c*O5lfK9 zYoJ*Uj@)#Y!RE})u%A>@79V|i|2_xwXCSg3%{#n9KRuKBg7`uJYM(k1IoQa$a-ZaE zb@F*x-b_1LT0H{+fH%wm&Od9BA(7F?I@p~1%4D3Rl%4^m*w|D1lmvrr0D=!jcGCGs zPy!DVlsT?_NCb|3ocNeyiFkBI`e8Z7oRKL}Sr^j`vhyL?SnfhN-^nO-vRq)a1bWUQ zW6pO*j75TQc!vpRa|7#fl0kooj-F?Y)`PWT+^o)Y zlL`E(Bh|s_TN@6WJdKDDSwTVscx8pQQ!>&2*joQroG>q8ByO3@@nCx91D=k6{dRY?xV!1hViiyE7fSQ?`A%YajXL0Wd z(KXg3rxndF_>f@S2a2XNYd;gA{`Pm(Nk&OLHIHbTNp5=<@m*ukFPJkq{ym3I-?Km z6@zY{7F)`ekDO&4AQ)I^;i+$4)N37V;saTT%Qy}Ri|ryaRBd`bU;!)mN?|XeJY?xj zp+9Js0tKsg2Qj?k`%Tt-xx`aeP8SEm?i+iNxdI23qj&CMRVbHp+@S)j)4s!GXXvGO z1RB%6K@Zh_fd4>4b)9|WGL^;lGc)LRn4qK(Ac#>f+NfWDctjpWWW=7Oe2jcsVAKde=Vx6V{2sV(-WOHA|dHDlA?;Ke4eN*hbWV4uwP0N1+`+x0}l{A0Kafl3jAn_Wx?I}Ots~t#*SqqY}Yc^RDPp9XHJ=}M=04s=U4LzE|#Cf zaLCG}#p+1Gn8=9Y&}0agc5QC++}IT3IQ*^MlO9e62IeA1-F9vBUAk?~K9|FUGU)ui zrCCTeMUZ?j8PcSK4YiUO52-ZIgun$M+_W@3?YoasK+1JJq-pA)EICw{$p=_N~Krwqv{plv~9VW!#F&L`5bhS#2`!sPCwfIQhY&7KcQ z(o~A{BRF}c3hX(keS_biK*0l^s#eCj__v~JhGiMGa5xcCHs+r|kB$UhSw66hGTd@j zgzV}qfKhK*8VU%TK`ebouSU?W;XC{}DB)QsOW7YuTMwPO*}fxoHJLzDE|_W-zSF|cNjqgZ>g&L^(& z`rlNI`ur_Y=Ib#2{}9^ zi+$@Ws}DI)-twR)6J%vW)1h@^L$@18*ULfQImR5u)S%1S?nSqDrOTsr#g?m9HS{az zeaHR>1lY%Wz>m(;?c=THUAJHSZm)^sAjVPdmPA-)upyN`+=%6;CNzLqW9H;{OO}l5 zP|%#enyD1>AacCG(R7XH5VsE6i=2$RFN}W|ug2#x;o|=pmL+m$*6O1!1_37C*=K{BP{6 z8om!oLS<)6T8_0bzruvTdw&60+foO^5Y+Ql2epGlsMtz)T<|@Q&A2WQ+NRcq9hiyz zSVq3lo8_Z(9!MaN`dAw;!8+h1H|9rO?E!NYx5BU=x_~ab6bJyp**#UL{Jrl=_dEFq zGmK`ljHln}aQw4`gof8r`HyMy?sYzUHlkLILkCX@UVWeqF9(=zpN-rPJYFd{-=6qi zEpYn}LSyc#Sp#Qa)AtY+v)Bg6x-;)rAMWtI$;`M|Ix+_tEo7O7tU_c< zsc41L_T<_csgt$MWhKb!y|)DQDIh2K?<)u3fo_yrM-W{b63<&3l+PZvM9Dpd#aa7W zfqIpVPz5lq%4pG{*13MA>zm(EEDmpah*3Q(y^Z4q*em()1@{VW$;jr3w;?>TR|PoO zN+|=-=X;dKP$wNjlONhfL{)mV%xEfZE;f3H&?*@3p& z-$XYl)8m+rUn&K3s50`DB^$6U^YBc#9CGQhlM!64Qd?Vgj>pyA&$!qR51yxGRY}==hFp`fq^>T?VRhFIfX1`)(DoyL)nHz3$hK1{C8Nx;N**-)R^j zR^u)d7yRNknJO?U2fsz3E~Disnf00RXfx>uWK0uWJ}O&npHUw%5GURR#R5s~KYzhm z0`<~_&kN6K79;0ux4-#p9Y5lhjM|K?ihb}lA5LDu7{bp-Z%f1O}7l#SO(P}ic zwr4GqRQ&U7{#}(o5iWdrma__SWKhf_VUN2IwwA3h2CZ>^EpYr^_$TiTR;&|Xl>CAG zAf?5ZW>iY0iBtAcZbiDXuk$G?DT35Al}MGW`*k?CCX|}d_JbU(+_yL@sf@1~rINlF zWBx}u1EQC82?@iIp+3z-`r<(|8@5$7XS|~57!~FpP6WDF!h#2n{@~r?XFoW>(Of0A zSHTcslpv}*+)$fCoY?SvNXR|_YzW*IWQe;q-K*H2L@=*m0k}^bK+!;NgVgtToGoO( z$b~a8I}Lec&-=;{M7U23fTVP8uuysFE4;tvcj&!KIDQypnpZi0#qT6zdE-Vjx6CFD z={m9CsZeUFK7;sC72VuC)D6gz7~BJUDg{S>f`ge3>M{MJVEM;N0*BWChq1lWYv{4P z!S$p$DQQ~4BDHdeu9=l@o=`1<&t{N;Nd3xtJl=*PR6W8`MuV%j9*~A%f48bLn7s0( zV5a;m8_=sMZO82yEbipL^-Dtf3aY1lm4X5ksrZKs=HDqmet|L3ykaf-L4TGR3nKr) z+|}H}FG2Q)uKe}u6&dmi@MEptyWPuY)URiJ>NYy|OY=71Pf)xfB=P|OgMUO6GfpNA zE-*6U+*;!nW+q8kG?a}p>1z6QQr;(;Jdrv{p=nX2LYl|igGMWf20pf}i*yF_BNjAW&FR@J}^z&VCgA-Rk$V_eNM zS+qpeCY*HUh^Dz@9j*ud*N+7P30~MQvE^LGI{4lZ4ws~a(&^Y!*86r_3EB{WCU&jF z6PX80o9`{kiHd1hBsCe4lz5K`D;Ss8KeZbk?=ylwZrDHHV08zt-oC`2pv$i}Ex<%6EuDfvvj>mnA(i+atN}cGAg+mk;@Qrb# z45L=)cVu13g7FQHXFXEWuKYiGHS&mGx_lp$3tc0rwAiqhgF9Ji5g`s3(@QO)-MieO zhGMIWy-`kroqCX=5Zk0dEUH3?lj)CSF~h_F+dU*&Ep()u{Z3_-=7nte8nH5l4JLVf0;` z4~6TQOaI!-fTutw@ME#d&5hCg+AQ8dxen8?$fqkeD2-sM*V|(9gBAaE8cJCmLxAgY zpJ3dKM(XhppJ$i`?b6+$^4gVXf(_LoGF>wo?#nq7K&zXY($G?NVKuW+uH9oJRGW?S zZ2m??4XwmxG80_|n0~Pni0aDFSg=wNUXe|oqZIpoZmuc@Y8p5S;gOq)Bg0m!$H7{c z)`zkhS+aS~pXAagkA%6XTxGB1d5UTk<(DxtqeY|V2F`Pvbl5MhD&6vXmE8xw4G!+v zkYhm%z^H)VVm+@-Zmt{eu!eWCD)apuTP?qrZKXZbvn*U5b-`16&XcYAt(%#K?Sg$` zpTn*yAi&KI|Cnmh>-$ZMSgp$+_=!7&jO;c&NT}>apG}pw;>yn7Z8J~3V!HgHwpGpawj&W{*}OvLX) zw3p%a?Mz*A-gpuz+bAnuXFt$QtM~$R4vzLpk9rk~Y#SoPlQS)9q$!^gLoZtlPWXn; z=pKh^+PCMutUaafXc=No#o8X!+xz)1S*(7=;2xI7dZy81IB@NLgO@VUd9~5xm}YY? z1o+9N)v#*g>nRGQpUbbcaK=ZPOf0FEiY4vakN;jWOT+$^XWZS{& zZ;v9QPMkrHF#iSXJpnP7)M({ODD*q#1?VY&7T5StkHa+)|FH>=Pa?HTS>$});T}(v z+xEoZ&Zf^PYJ?Ln&41Fii%v{sdWYQk~g3E zp1GT71FMYIQ*>^f^zu%w01uU`)klYntrx4BO@Phg>%d5OODFhf$=-6dfc<$M*W{B$ zD=M}QiW_p#$MM!DZz;o%4Jh0Dp?*lzEJ|j?YP2N5qyd>SXSa=BwK-OKG@Xyh3jn9A zt)W0t(p%oz+Iyf3{G&e2%mz?m1KPMX`N*de(zVHhQ{Gyj*NjV!QcZ{98-svY4kGkz zp>u{935fd@FYlPQat$*S^)A~wHE(RzJ|k84sz&k!|>y0Ih#DZ@kw& zgEj(li3`N}%v+K5t_^Xp9JXv<0D~~!uBh~brOw_5Dt<}r>zVGDt^6LZeTJRKTKwC` z+GKy>1Mx;X3X<=g^P{y&-;i^A{yyvma?g1Ue#(4Qt~{!S8X+^RJaBzvLj9Ps5m7jy zmc@p^n8g`ZhS@E9z`Wdy4Oi)+K!y@=PNvL@z{cMUPneDrWKRsy?SNys0I*%H;hrW6 z=ZVe3h-fm&xJ(Q;AqsgDcL7z==SH9^|0$6dZSw!9I?JfIwryJncXxMpDBL}`1$U=# zcPQN5o#5{7?(Xh{;7)*qz~k(F?tAyuuhG_;ty=w>vsWK|eA6*r#LKR>46wF6#({`6 z5kF~MF6;jdMI31=`c^-vEZ7yYRlF8)FZqBDFHZ`uR|zC?g%UR((m;V8ODZ64x)d>v za`qvuTkaY_LU4-#;cwkPC(=EanH6H41tL}9pTGzoA%G@p8)zNt4d)B|b{U|?C}ozg zI(b`735!zJ`>3M4+QcXI_&Qik z&16$_a*F1K!E=C|1CncC)vY)+YR9ahN4bR{XL%X1zhw9VxXtmUB#%r>_H@EY3nX=V z7Q$?jviZi7BZ@I+%Zf8oXmMgsTVOG3?;O#?nx0Vz*U~)_AaXH^s-H*Rnd4_A8Fe<1 zpsVT*OKR@K zvH5bM=Hfof1mH#96&<*wM|oOd0X6x^UOH(h`2Nl?f9Dx1)-sZ{oz#-)#%QD-`EVU) zpc7(zNk8DWQWN@85^-F=@bY;`Q;l;_er0Y6-hk{#d#bL_)830QC0xy7w`~imm#OE2 zeCNrzpbG*s@g|DavEk;w_-vPa4KAY`!whMuj}UHz8~m$)>Bp#)3OwX|1npgB^4gG8 zD@AHT{z@8hOiZ|z^d*5ePdZljj3tcz(W%tV-JdGa^*!i|+)3leL3e58-}#^viz^hg z)u@!TP3xl8L?O zTL#6@{Fwk@+*@ggcZtECM9gb4x%N}G5Yx!Z5_h@6mKbgHAIecN;xIC46qIt>q#_@= zVqa&}w8?M6?a*tifu`Ki``t3|0M>*hAn&G z)v4;%w9W=kKo+GtlLy)UqQ1ICew%4ztwG1Wo~mcf4RbmzQdqAvA&5&xZnQX&YAVS- z>o4w5NG-)kfY4QBO=dfQv}-w@YOJ?e+D}{y6}LJ&qxgwBjkcYb-^)7zxEG~Ykym>c zoWheReG3Qs0PGu5PIukrQORQ|SlXnmYs`|*I@i@7T*9My&dT2UKnfJ*Ysb#}J z2)~=)noi}ge>^ACEWt=Tk-m-boz=FeI-^v)7fOLA?l=}qeZ>S@XeK9>`tHo~m_3sw zA=3}HMHHT^(Cmp*^+HF>^99GDkiw3{x4Po~i(X@eHHohPa=J#suCU6hkq%c9@{rU4 zWBB-iaDdprdkhHk&+tt+u$zS1KAu#oJ6}5I!AZ*!-x--RpE1a9+5zfL=0o?_P?>YN zb2bn;HU@8!$L^;BflmU3`pH{zPRkFI3rGVaC^ZO}2?69JvUXco=AR;kj%4c9RYttV z)q?Z(MvA5KZ-4Sy2Ec8C&oV>mj0UkZx-%uBRc*5*h$KL?EO>TQ5@8pq0%%80FU=i` zvwea(K5l^u^1O3H*~&U6QL*!Lu&zSJn#cp{PA}Cw)$;xtO|>w?ky{*ABXRq;)+h7R zxK$j@IyQ2RfBZ|4FW(qSMy8&qJPn^rAzvBTzvMt z2A|Xy46c4>S=F(%*r(+^fH@^i1Pt|qL(mXtSs5j22jHKwJ5GLp{Z9zxU&hYoY|a1u zkbhRD4t6Y-4i1*K<}B99thv~@|Fitx8SzIK)KcEh#N`*rbg)A(%5+psD3YdIZ}I=; zSh)S9;QyI1Zwd^r1pl{UiOT(NH6J5_`tLK$TsnwVE=$|J~Un~xj!Ry{Rsc0 zr%S2R-?gFenhXm?A(8(B&Usx4QrQ2T2oi=!Z=eIGOrM7agG>(ChD~?SfkJJ%EHwG{=ome3^sjF2MQXrqVpeH@k!W{o#STam@0&fks?|=hGzo` z697hu2!UwEK|sFp$o7w|ILOFIuV3Br$HYcgTvL~FrEK`HL z@XU-BcDl152%`>Xm3NT!o{tor1&i?i$Zv}Cx4hoah(iQ zecP7@BO;$YTzMs*#deB|aFNUb(G;o;H%a8M9D-AKC=W}z3lQj&Gh0+bk>^pE6h!=t zYL^&(kZ|2roVmC(?1OiMX(H#O>*J4<_P1q$Y5BbZW>1MKkFw!)n=t zb9shBm7B_3bXe6Nm8_4WbKU*20Ml(>nsm{H>Xz@fBR8-QXC4LN*tZ>Y-NM`J1IM_0 z+hF~=RV@nnGg1tP-O;cN8q|i7Yx%uY&Ck2qw?+uS$=#!IG1(|hP*5%{@P_0$3V|V- zQg*IsxcMF^ZNKh~pB+6wBytV;x|+@x^vtyu&}*y}5CV6^%$@D){XTnB7&jPqf4k3h zsNccT&iR;y#$kF5hEdq2NvQt}#NYPDJ1^Ld#UJQ^2{Z50K)z9elYZ?rQik&Zccv_q zDBse$vbk?v@y_O6sgrhGzD#-^#{%PBS6RSBsv3x+4&KrU8f+7isF06_U<*?YNE4gmh+z-%hDexp{Y^!=#zY*({Nh(F z0venHQXMo|+b3`g_{cNH+OdkwY{{nZ61g@iRVU5durGKZFky!MSj^G!Ms!Nmv6)Er zrpF^1x)NwhciII7+3(@gxtA(!i#9Yc6PVvK_9aXeWcue4!TC7{R0n5CjWHxz%Lm*; zcGQ-3wPw{eW*8@GE&ga{%9ctdQ1u8TGTtPJb_cgBr89YTA`kMZ>S&(8hx{om#Y9?d zb4zIW#qqZY1oz_ANk(_*ny5v$G)1Cx7V+@0<|<@9=U)~pfx-p*{8oo6Q|e?(UNf%T)>JhX#{l51b@ zZ4Jt$PUEooWuYyAFxFPRDEB&OFo+zikMuynF*~G3kMQ^~TF*B!n#JV~i@A4lWUV}4 z>i{{VqiC*yxfFVHW0%F#M!xyDfeia>TT}+1r-CJB_|2aRgidGXbuCSsWz^9)SNB-Yf{jD`l>&iQ62(80doXV>dsboUa<)2vv1;YncVTU9wm_Qd>JArw^jt0zUiPLu zh~XK*eghY>zpWGG06{U~N(7_l&wGC3TE;O;@2sy-uGxXiYUx%s)w8a^gR@W{eXk4$ z>Wddfy$wef3P2$@3&-+yfYcW?XQ?=m=INKEN4+J3BUy!Hq@`jH#VEi)4j(s@FTn?4 z*F?FeT<}kNNqd<{^AhNLX@UGxXndzU<5JX8ViPWOw;55E=*c|*M5GJ;^6ufn#>qaA z6V(3H2Xor%ua?+$$szGcNC&32%zZ{6-*iQ4zUE2e)Pm(hh)k2jkoY4ODqnCA)qXO% zP-tJ;El_~&fJ0AUYQ)7eX_t?V;t>j5Hk_~=*<-jf)o3}Jl^6~bJSTSx$Vu6UBlTwa z`a=G$F_`{h)Hk$8@lBpBNah8`J>};CR7mR5^cPfFnx-Z%P<&yy#Y0}g=4V%FKOVf7 z@gVJlAo!{^Mr(a3}&ObcOxMM$TBSs$Ocm4>ut|B6r><)_8R2wq-najM~l?jQ+zeM z#8EZm4>vdDk0uz(=GS=wvGUpYmx5{8!{6ihCbVH(4ZmC*5R2c-#ghMh0F|l!a@b zNlQZ_oK1M6Al(?bpiHhm}0llM_HX@ zg%s<^nC$@)jE9Of%&5@6vd~ZRbcmQ}*zJA}PzlZa*oT%D+74 zMbTyE#G=AGbs!uCtKrNA+J+|U!E(WOp?5|D!?kU#nhx5o+o-eqxg~(bF~4^!tEiCz z%Bk*9wQP`hFu<5FX?u59qCsZ?jCvb?(7KIqSE-W5{poGF_|m@dJ5_#XGUChx((f~7 zs?Z94^%8xOLmKaMzp`h7W7d{4N;9>fJNjY~<@ILG5|C?;#x<|Y>ZC6MXK(0kfV+_( zYDVjy2<(N`t|=O?bpXD4vOF`#I(&g+7rq%n39TIZy04oIfi_oee51B&g|dK6xdS=; zD)ruI=Q;knO%VP8f$!DUo4mYei5WWBBo8@oOe*4jMYj)QK$!Pr^rXUfS;pY{lGIv4 zsRtwcI|w1pkNOlPgxu^J+Ux$fqJl@%kle&JQbDj0)M*$q`ko7t+%0s#Ehp&JlwcZ@OuLcW-M-%l#^SomfqLNf$Y&qh|_1Cy-H!OK3OoCH*>`3?00 zp;AR1xfmu{4^(am`rZE<_y%XS{CDEAkpi=M5L6&i9{f={Cg>1v{Ya=IL6dam>ym_1 z&eF~y@%O6*XxlXeSNmyN>B9$`K%Q?wK#bKiCiwTVe7qN~M`l?^P3vw`sFgg0s?=w` z=O$v!ifdVUM@#?0&LZSxRZiUG^Prr3@huSFoo}oi{dr)uB_G)t-T_$U_7Qw!B&}Z_ z2Wd!N%Tkk9k4s&>P6Uxne+>k_Y3I5$;&YZSkF2k5&{7mZqr>@>#?HuPy4sEJrPjXi z!eI~y9*n=d+l2sHd)!>+bVYk?U-W|mnzU{)r)BUEFlKDQGTh<*O~+Cfc@RnQLXfqJ z@TQ?zSn4+8_cd33GfHI9yR7M z4Dd-~PPHu~#px|7zzGMd#8I_LXhpPxQG>jW?N@16x-o(5m3tuy{sV+Nu=K{9PLO^Y zT@8r*5m`3B?$_5 z8wyXx6si8U2h`GJ%KWw?T28xwdCkljR-sE04$W6~q)Oi$oTPqguP@8BUH*&%{5o7v zQELlJ>m>}!X&y#Gm0L)KT$74jHqxMqz{p!dXsku?OSMc={M&PRFv#fYNLvBqM?oV? zq>H2`j{t?x&xfcExeVmd3?NxL2rmdwZj`MHRv0p}fQN_z7hVAKg!H$DT(yIQ9BOGL zVWt>p8#%bY-zqmOZTJ#}K=?HPoU^~+~7^chCK}iWNo3t{+I1GHT?S z@wc?CKeRBnV=bweJm|doaFHy5w^Qf#E1P6Ck=qDWiFxq8QZ5~WGQPB{xJUe;DfCQs z1QyI+tqIcRHRF=ZV@+>P^D6;lqEriUFTl3UI2Qj*@b~l0s?Xp;zU8z{c{(+9++eNU zh+<`DD=#?2{1pI2q<-!XpfN)nIupfQn8yHUKy zrNa8pUi7hd9=#FB+Ev(jv782Z#PItOM1LH!*`Pq z23<@z*(qlG4+JLbW+8ao^^7Kdi2G5n@G+&H2+{Ju6M-ZihU8to2#LMh`+afFvEWLl z1~)9@pOcZ=5W5gM*P6IT?jJk6b@H>1TiQm+j~Jrqc*k7v?NfeOT3#+%ls8yM@cOpn zh-zzIUB08}0{@blAP62UG9PpR$7Il`ZY_H66tJXB)?Wwo#7$Ex=7F-*#hLMde@&af zyIjI#asyp4QeM>$lOLL<(0Oc)U^tYG z9vGLfoG5_%E`M)dhw3{nc`i+mN(2Uynt|3yh=yMmsd{+qO{&%+WU_R8jvVVzPSO@i zS*)|$pQw6L2WMdpVRIl(nlFx>SR+Xn=3r|kazNvYGyukQ)bfs@`Yap{#;tSk!P?O+Qt(81<|w8E5r88Da_vR6JA-<}q2fs) zj_TcYb|~deJP)?cn8ZJ7)UfRLYRl~Z#vuL2ZfZj5g#E8=MbN$R-|D58798n6Z--NU z!S#QFs;iTk22?=mc<%8R3CtjDx;6mX@Kl=$D6JE!*v^R|Da}S1_N8a7 z*&Fp7y0nGlh0YPr>fa>3kq5z-C@`W8BS78!!M-6-xeHiS{ov;Cq3i;RP2crBzdyg< z2=9KpKc7V65PL)R^H#5hZruqf?#@lXY`1^$Xh$7_;q3q=C!;ty1F_dT*;l{<#Z~L_QQ%M26#GG}7sh2~g^C z;)`ovhzN(AcAnsW*%wt58k!g)CqC%U$GW3kyXWN=if6eEHN?E7!XX|?hw_yiKC*9i z{*Em$(BcPDA6}F2)gR`T@Kqablkn9Y_K@(^9DbJY-BS>H>z7RXNVs@Oj39}AfrI%= zM-wI}e02O1+@EaU2~3GSKs2hVGU-fyBCFI2F)#0Q)$^y7rK_V=GYyxku^~=yn@@bV z5^|ZDn}9&MWzxAPjJUd`&Wfw{O8Ad&8e{_v#R9dUQcl6l(?oSIy$RNwwcM{tA!AEt zcPZ#xtmr`XxpBB$K0GQ$rSpT@SCf#dBn&un)f>53w)(4LhFtuMbdLLwLIync##I1q z6^+`#b#bXn;=pk`{lx}iCVDn^k1-kt^`hw~Kb+Ar>3%k4aY&jd8+M69+U_$d0#3C~ z)o=hRESo{QQmmA9a5pJ4e#@j0Jyq1b531C&qjy!D#8W5`;elnJg+Qk{2`kQ8=4Pu# zVP+WK$_~f7ECrWQd$c0k{q>zqTO7Btl_59tR9;EGSRJg793yPe2P^d;%_wqBw{c<_ zmyE?E?%)YK6B1)GBQUMoJyI11O_&9Z)td;ky;v!#uXuTtNE7~Y&GrJ_Secij9G0w; zgsY;iaM74oY-`SAV=M_VslH(*wLtq4=Vu|yP1%?kQ+Tf1*yT=#2U?Qq%Tg$zWqHiO zgEB}RV2IIweOX|Got;~|H>Jq&nZZ;3NZW`1)saNhbx^{IaS?&!G9v2HZueo z;i$s*rx6+S14kI%>GNj{W}_4TQBbdVU)krTAO2=p<<%cH7+q> zMXpiS;v-;Sfk&c#BCGeBug{G(Dg}E(gqL3_X8;?pji>o&@B=?@@9hg(a-4DrR2Q)5 z!?u#Px!A!z@eVp$ZfbfJ{iHEk4$lx&#gu)|X3v2qi9FYqJVvpe^Qmg0;W1YV$vb{K zwBsPBs?A9pp^Mw1316abVoR7O1;SmE=4Jx zjaeG^eP=k(U_R9wloH%ac1W31a7dp^3}_#^Q~oddd!%LgD3&U7*2MT9uU(au)LWLf zZ2E?fk9}{rkCCQt3@7jPHa{h1x`E>q)UwkfOf4{f{e|_r59Z^PGmJTm+cMYf^s1S{cSb=CTkI!Ld#gaF z8iYPOF=8cZK68aKTu40nRaCQse;K=tCe5m}aNOJXe8(&(2qwuC0O1X-HnZvpePEg6&z^pk`dGexlVu?kNk?){<*uWO>ssi?=?q_WEok6z9C8yLUSL*b?ug){|7Lm8_G~t?R>$+@QdH7J{?a$(3f1wOBM;Kb@ojsz* zKtb0jo=5IHVk0uP^KS=zX(Lr09f(e*x3e2T_9j_>#`n`i12urE{0r)xfSqpSQ|YBB zL9P~z6_c)#>u%nY^E4u|Dq~MO%ZW5JSiQw9kLtS%-TUIXqVgvmw<)|H_V$*q9^>}f z7o3k9>O~bOTO#GulkV_{A`G#Wd%`J5M}@aT^t-pV!eHD$^jf@N`u* zQ#$h?U+$&B6%wL|MGtS=fXde-gF?x}Vim*7KMmi?Z@Yw>ReO54k zigMW=?(D9t@>^752VN#r*Cc^>N7;p$uSSnP<(ms(Y&oF6CN2mH5eML3n5+9G+i?+h z0G0iG>#rQazun&#S#`W^=ik1w2}>NwMUM^vb~OOKF{2a@CvCbsk6Z<-3ND)aSQ65} z=DgMQayz7%eqw80MCtN7)*Lc-TyPq48fL|9vMu9p&g6n$-iI8i%n(N9Sh1G*#2I8Z zbL#e=@l-%Uec?JZEqB4?)`B7Dz~$y0xg$5|xYwJ4$W5T<=2fe^ba2stKaRC-_bzbR zu4`eSR8c?>s$xV>t?dd-+zMv_p9ep_D5;^SoIeU4?P$2VY+~mze;X`2Oc4K<_16VY zaQY58?2Wvj2tMm@@uqT;Oi>6Ao;@rCea(Uk2}!0KyzNxA`lblh=+Q*&9HB#(*?lSw9t!f!P5oB)XU4bUQaaTM za`fnL>dl&ehJiU=-^Gruv=i}nAvbq~&e$S^fbhtM=Ebc8e*CjfE6Q5kEV-kV5L#G# zkl`SYmbr<{0dQchR8vE!8qIIGAWqJ9g36m=(R@Ix^02t7nfBDUX!V`?OrhKG7FX~s z;;c|m3)Z^SQsG?kO_y)b#S?f6tUBgsP`?k$=?W?I8uQ`F&Czqcy zY$GAy>?5_skcZdD9OZM9#Cd+i9LR}ji0u+_j^HSV-8J?BR)cSB>23Lz^=ctd|68o_gB^Qr&T{0Uh6^b z(7NO)T&m0q#n3pRnCfJkkV`kv9$hx)FEx5zKf$WGV#^PtZtZb4RE9v&myYwrTbeR_ zGrk#ozYv}1Z>C7*){dHwBH+I+?Xgv(v3m6eR4IOk;sj?02OKGJl+28|B|| z%ZLhxaX{)Rkp77)$US}VEN9cB;v@zQq284+x3ZHHhb}!4<}lIh2bvPIXBuk^*~A<= zVn4x;825$QaCW1wW#+&m)h*sy<99=^>8{L(Gwu0E&vIUG+c*WQZ%36;xnxyUP?thY z0=nb8xm5wSROEwy!yH9zf5wuxBgJIw>qJ zdy$R$)a!4{QD67daVZLX%Ku)x_Wr*PG3-CLur$3*QNsM&tfgGih5n~m3#+sGr1oF% zj!rV$5?i{V0Te>h^oBq9KkqG*^E3!Q2k`m8fs45B z#)fzJ6&elHUynPxQba(<783#F3s~?Lh<+e+fX!_{#ZNrIMJdF?89pW-lR&M(MK(rH z7yCrKnRq52F*UrkCS)FZhKn1or!QC_bn@xA!b@6eWvW4QX6g5oS`b0g0dXr+=drdl z?4Ut)7FLQR;()A9sltaw)2vP=OTv+HI*r@$XHZZ(M%-{ zr*2ne)!#5YkyA<2 z-AdTAbBFevPh8GGUsBjghsuMe5iu|&)6L_M)b56NW}I!i!rh4I_NzA4wd_Qv$?=4Y z90%zBF3Y>bQ!?brI&gBza|}D{c$XR8l9mn9|L`&GjcylvE znzl1Bpkc?URcFqkEzK;OIOM@R?m36fu5wPHWTK2vLCeMuDfh*kqm3vaa#4O^NI~6yoTFn0UWx%XT&jOMiZo)5=M9#CSlrKhTG}iPX10 z@*|&GXvESTgn3Drv?fTP@`X(Lz|CikanK3XUQ#tL*&mNgO?_h9dqYHv5KeU@%vS;Z z&_G{w(4J&U#3C;WnY)8pQ2A_|$PTW(gdyj(R3zOp^(Lc^Jxk=!KuQwEJ|`7ban8=rKgju;#zdl*At7Ret2T#Z}cHe;RPmur3>|q!cxg< z7I7#~hc6vZKm}~84imm|Ty?HhzHYg|O4^WB>5p2K1@2w&PfsA$U8gbfa=oIipl}rgK}}1g z}q0FvZFAroTk-NuNI!k6|f#Rkd8lIf=IFP|60ej{{`OeL^q@ zS>`{8yW0;}**fEns66#D_y(Sq{OWDJ;C#+m7yt76UcR&IzzyxXKBMPZM0kz(8;5nl zMwj^^qAXxPJcalncv>9p_JMwV`(+>a)1le1`wbR}u+A~(C0fiUX?OYHTkQVGDP5d_ z7_D*(8Xw9IfyQ6`L4Op0Y+@acD!WH#X-8*jn_r5$5lN;^>1AuO1x;0rNnY5$D8d`%|HrAnTBMNh{x zg2DvlYDR)E#<1R{(pv7V84(8f(B@4d$nHbMV965$p<|LGUcP6DKqwd|V*5mP+qi2k zpQS!3-cSGu7AXXi_|DFnMfZyp3SfXs6g}AnufpDxkAgsRI8(KcYLe;cUay;M4_}v; zk+n}5&>!nr0!OXr=;DCU`u#h3tP`;|{VHfE&Ip`_!f{_}zhHcZ4$46tZGduaXKZGx z-OC5^rk`KE8{wJ3Q$&*R2Kh*EyHAtq@j3L)|Ghn5LAZYgM*GbOm&D{QeCQ=Yw#q-H zK&W*82wrGlNU2gQT#4=l01_C+M+v{fdw;RT%{R@E9^{$XiysD6}$G6b1? zkQ|+S0#GCe)-VOgzMm-gEjQ}VO~dK&JBkUHD3uCqF}GLjG;#ERD^Q%2m-tAIp{{oZ zY_`@;QU98M;c7BMB;hwc?%pK|LBAiBZ~F@!$OOUbBfx3BRRl#bB2|%N=M|UtV6~4+ z=QmgdLA=c5x9o^(lnu<>MGowlpaPLGVj7Ks1sdh&mYuRsP=$@*M*2xkag4=6l-dw8 z;PNv~{c@daX42g63>Dn9%wf!mJ5UXYEqfG8+>Tg-Uut#t$yR5?{OY;wKI&Gt=VWU+ z%uYj%s6=Y$#G^i6v+!*%<2nX4%lJpku4Hlf$w7p^tFzQ9pZ&A@GFY7FC$k_n$#jMy zFR#l(Ie)!Q&CL8=9t{IapkkkH6H>wjY=)`mae*)F#|rQDY~fnSOi`GoZYsxK!?$<_ zU5Np1NKCPwrRID1UK!3Gi2>7}H2X&^*EP;^bekk1gyr_!CpxWE5on|}szq4aTMWI& zX)){m*)u59Ud+#dlB=dDI}m69w+s@td5&3KgNP;yOI6C8>xWI^k2a{ZWNoHa(PSIO zc^A$<9p{jbM3YbEA~5u>=LUp|+Sx|Ea(OIz(baCU9`Zexug$|tFL}ey9cLq4kyoBl zCJgpzJi(uE8}y&7;($a?>^U?|O8)>7S%G*GxZx8El|upMuWz^<=)$0blQLU=syhz7 z;(OWdfP4f!jx}4&1;%5faGZ}&5%J20`r+u8lHs~ro8P@<98`A%-IRB_Z;FS{o-(94 z45yN7I@a~(F{6hOc*kRej$Dfw&OApSk^f7H<{No$S;8>s0e*5A(_=tZ$KEzo#}wo{^mh{ z%Tj(FxrCihGeH~PdWGj3QS&T_ebL*Ef74Zl!vMofaA-mB)A@IMtX}n9QAdue6x|Ue zoB#FE+9xtr=+Q3X+|ZTr>y7+$qPja8+LgZb`n*q&-_^{h-Qnl^*pd%@q7 ztk>FLtiR%2KNotufY0oExxS8?wfpTLt(Ehoi}d@+VT9(CEBFy7n{poi>F=M^aQ)v=qExjPf%9yk31h%tOJLCmF<5Xb{%8NOVk1!e?EJT}S0tN<4DDvi8gDp

+d;5hna}0oE+WQv#+#HucBXBcL=3J7y#(QfsILITDl&OI%gg7P@F-FdRf5vbkU9qE> z1*4;lk2~%m{JnpM7Jn_0)LEv>NLw)6K+LZFRvqR+C6?w(nozo-Dg6VAUBprS&}u^P z0o%Z>qyAddMf3()y>f=?kwn<_8zqWdU+6nI7&z=b7DAxzq+zC342vOt zxJ)Y%|9k+5dw$J2%jUb#>V_LMQm(!w+U2EC@MzgUU({92x{P1SrTfB1?+_}pJBE-x zI6#gz5LZkKETXJa4i?Gnb0HorI7(?S(%EPpUdUNHGL;yI6*^C>J8?x2^Zi#T8Ct4w zS1kma*m&`U!L!i{ITlUSEIhK}&}JhcPe9GL5ET%17{d`ZF}E8qY84Qy1tF`IrXop= z{&h;ElQ$vmw_F>g&Ra?Teqx{NCGYT!Q1p*GEGnzWb>3!GlD@Dk^x7Flx$;j$Hszud z6R{_v1$kPel@sdw^ro6%sbmxla!NmL<;z=KlfuI+JviovQEqr_7PR3 zxL7T6QLb(6omq1TXCyW!472-BbpbIwsnJ;bhcx#9wDxX29e@CZ-Q^EQpB zjZt-N{&_dB3u4KljBCDlx8R-Bjtj_@R=@I};H&9By=P7Rm_p29-*9T+r3^;sq5fb+ z$=pJW8wkrP=20KHVQ~(z zY(seQg_~vGA~$GJeK5AAT6UH`)nDmbK6tL#W3et||H&)ZJ!R3)eey5%StLd&h?QmY zT03;E+!Gq;iO4444xOB$WqX=;JTqk#AKuDuDF46yNSAUmgZ`u%D|}LoMgFhCh%$mg zY`cdBL;W|#+Xf8}#sm2e!R{V&KsoF)r>hB`Ja9ytu80bT*cN~cmhmq^xs4DNOat~G z5!2?53$_UTZz)d#776)pxj_Nu`QI;9rvV#)`u8b56IeX>KLKAs${q=XPeL9v{=W(2 z{{(!~_gKJ)L7o_EXzw^?bD~fw3atSU{*m>{SZ9ev9rxfRIq%v-#*=*SV@mqNY z?rya`yk~NAIP7-&?N4;wK-FtFdfkfY%^ge18!(aufe13XLnh>?F8kq7o(x8hf5_XL zIb$SiMtGKlcyCXm)S=&5!y)kus2NZkw0IIFuld|UcdL-!Q=_$g%?M9M-;L4rdf`HI zNUZ-t^LkK2zDsS)rWobf1xcb9;RzGpw-oCkqZQ`ib4%Qv?sMaE~I+r<7P5fjUp3});rspNcOqKLVaX$QA$%|ff3Ak znRVH=sy3t48a`v}5(`(-H>_S~sp8uvZm?uO709R2dIYEGH-HK_4`5m-#g$c^f!89D zD^@EoVyG?vIN&V@RkfM;w1<^~vJMJa8fGq7%tD#89C5tkTMdbVB*J216d z4Z_Q6QSbkukj$><68}Y$q2lwu!`vr?MAZkN#{a#_c<8HcYuot8)EIPp)!H#&F*c=w zxAK%%3jc@Z-XfC_McIW}P!XiCS<%$!l6P~P*3oZ8IT=Er8oNF`Q1$>AjBTgBsMnvm z0*5QuLMuNE1;1QuJ)+K0rYk6`;bKNz2@=2VDfQ8;XzK9lnCxGv@>kHO7R|bZ0+vY7;4C%2ia0JE2^b zx)NhHl_H=HjU$JF>vQctnMy7sry(H5F89lEY4*`>=TMhPWcfF|APS28?%@(GBgj0p zRX4Iv1rJ>VznP6?MZQr&ttkMj)1v&UV;Kzo7ZY;)&5M)a25qFaIGvabr4_{=%{uB# z6pbbzLPZm+ec8OLyUFNp!_4mHZqDxR!yz62+ju4h>(96x2V+#-hB;lMl`UF+^4%99 zmLmlD-f@W`V>~})bHYGoQ1Gd@2Ca-JuzTo*Q*2(MAfmKy*!ssD6F_+OY0rgH^dOSmB3lp<#brqNX zymaHdvHJ%aMtOc4{nUfheTkn+Fs{)Ws5%xnWE})w^~d~;7?f})Z7>ujvr0++PI)x< z5*^cb=)nFSH+ITM!`K)617yhvLYt~9!-%?WL#8vU9=k)CjMHA3G)E#i+a*8xY^Fj9 zkR8dqsg<&cVwb`#jjh;Ex8tr=@X9TTqv`)pK9#4{OHw3*AEQ&VTsLYR&#+h&u?i}g zVCClP#i?(M!ZMIN+F34WaWkzS-V3HOX52UcaNt z3Ld{kpBXc`<*ZH>rXn7ymIBCD*I>d;vj%ZG>YkYl=5Aqvhy=4sXMmMFmu`n3f>>fY z=rLyXIo9(H89m!~HLRxuGU|8EqTS;ydXjBI>6A-qZ#Al$Y1=ESo2YQzu3u`mH|Oh$ z?o8&ls{nOi!`FFokw7ZI3@O?Cl9?lAhAxWnMh} zTEw`OppF8fEyX|5DN$eGCA+Cc6*Y}rzs;>wCmVk0R8Vn9e;^zM$JU4{QKi&~$~{bv z^?5fmDRY*pCVQ3u;xRXh#6Lg{Y+%_s|MXcv!uW9K7=aYF4%Lc+b^lTx8tnjDcDaL^ zw?9L+YRe66t(sqCiqk=XtX#_v1fG_ZA(a;pqauPH#StOr28d8+2Zkg}RL3Nc5es{{ zPtV6J6|H%9V3kJJpK0>1#pzPBf@t5r}^gC1pB$o`tc!(k-XgJDu~*3BF6xTvT%@BF`+F!h>|uyP?0*d+>iFO25(qaXSs4z z?g5sGu;1GHUW+53C0nA-YD!C*{l0yg;iTamG0&fWv-{9*y?)}VrYFFgg%hG(AU@V$ zjXJ)KI$oh|p}eX=61U$1-wB7xmdvUy{Ogg1^jBid80CO&#}Xz%`O|Cmv#oA>?==t~ zxcuWPqR}76;Ut$6qu&@-7>Ve{dc%5XXJhr_tuyzk&7BHn@_MG^HiH1;%<)Vf>q9Rj zd%~fXP}&@&Rh*6T!2Llph-K}^X{Tby@cr7WthKfh#j$GQB@}Kkgy|13+(!nkVgiSMs zyN=pX)aad^2nUU^qz`$b!c9RLqiodixA%hbyHm%0;A2p`B+>+>85y7Wr}+d+)lYuI zG(6_fsg!|{Vt-T-4$a|uP_n#jAUZ;-y01uR*`Ov`dHHYyR$0aYAejSh`Y2dlD5XFA zE)h~FN;=b5e*Xv#aX%S6ex!jQzRELbOsU~jiQ_e<-|}cZdJfK4C=SGb2Ssi8*^s@v zl=*cj6Uc(#D=GRYG}RCUgW3GJ=Pn_75uuv^NRgjXnP2)CUx#~0$!t_8%b0}9`+*sr zS;K^r@tO=Xb5G_nng(u&^`euJVS?)H*kBVfT~UWkSNzZv$CPk#TXSc;xnTJPas{runs@r!WNedvN-q>}h_#q&CI(4=97o zTfL=t(;8CJS_98pyj6RXA8LB(0B3sKc2+~t;R{sx+@@$}-QpIW5*N+T=` z^zMDcJXI*E!c>&uc>Lj?V|j0b#NucXR_9QHuH}ACwka%+0rIgj;$48>TywC}9!uPf zu>0Nn$a{-}Wy{65N1OSp`XbfLcwM>Pa<>NeJx8|zA?yem`c=8~Hn!AmFC%f7i2x== z7JV_87dUS`(hNcpM%>PzBFo2$(`u4pmA3EIG05FLAkJPpfGZ9D7SHZLIYpzcwW|X|5;_K_THdMJksYqp<*A zkeE^FIrT=TLgbM8e$4D0U_t2{TMX%;7q(fBzjOzEm1^`(He;To<^g<`St^>BZ$BSBfLzCeN37oIrDB|`epp_Zh9!3{ix$|m}kR};^XIE zk;TthZgbYN4)ZgpZaGF$5;)6`zg`V4F#%*+~AzbM) zX(ym_VbT3*FUY^<=CKAnTfNB(_}NkRii8l`oE*NPy72_wI~vk5YZ0RGJ$aPa#M-@_ zZ7LCTl)4!6_#T~U55|TtHJ37p$W}SiP;b4gK1ficGxc0(8Ef{?TmO=C7O%Dd$E?Ux zt>8uBUrGJyDw34Sm33T=-Xa_Ky?57vx0JiDv3BkIJH0#LiL(>!f&bJK*eUEW zwP(eSPCzc_7j3Zx&Dn0b@^DEKjI?~0*eC_aYwJhWufGn*rUYSU4>2EH3)%Ad2oigW zRf_%vz?jQQam%cI6zpySuwvK)SoTTaa$_(A{*zx!Pu{&=4ivt}=F&7OD7o*ATUX3e4+;-lH@*Z*K`&b~6dSLIRU`#H?O*m~vH zAv9V`F^^G41PLO)tl{>Ma%k?*CzwOtu3wZNcE8y8G=mfh@Lb3@s#{D3$)~+a_|Odd zdXKyX06EMkbu;tw6C_CZhC_VckW+j%n{#!}N_VaquZfKYzuVEaxY=%Mv{(h)z}*K>GP6vhiDkBz`aR7DmA14!>;X<;g(pSSJ3tMrW?Y0=z0K}kyKW1 z;Cp_0c3Nk4%?!Q%uFj7nEFNa22vx_14}4JW*^hCUrgZ)EhWdXBLidxM;U|Dszg{Bg zU!d_?rEYI(gl4pC+U>4jvVsJ;O3WR4g+9GbvIs8G#xK+5Up#w=Ajh_h>FQ zQT;0UWA(Kola9#Mb}ic`BuN6~I(fWa`--NDrY4pR$6wYjMNMM9Q+`g(mPv}GPeV?p z9ph~Wxlh@CyA4jHK8SD9c8){OCA`NFS_a|~K?U%}`}-s7k0Dt)t#=H3@379@=&<|c zNFV#`$*hO7)r$Y3p5g+sM=is@kJpywXbykM|qYPqtp^w6P(W)~Qe>??^1R zA#2*^T&QuxEs8)EWp>@M8nb}9rrB@(24A}f)(xg7VM&`^YI6KYmNK-P38 zfyqLxil!x4ANpCigTY9h{Rk~PsesL)ErYhUC?dTr-as*E(S%HHrO5g4AGVs2loQoU zJXDl$C@&u*W=(#|Ugt60M^W^>T#;9mI|jjnW1TvIK-4Jz1(DAm{b z725e_1luxtJ(``xYD7?ARVf+TW#H!ua;tj45hX@jk6!F-BGz%>T?3CHuYQjtC00g{ z{DS7LcnI9MOl>x&17}Kv!_UN3W@X31MAE}e*@w?f7W6kUF{S2ME0V%?=-U~W%-9Km z&zh~)8$rHV0`1+8zlufO89>tEhxQ6znP`7GguY!UwLjZb%gt%9@`ylj$k9mbzoIol zndy`I6-L~dDO4QnsBap9nIn3Ht&S<`-15gRZ1sx+<)v+x@|?Q_Fa3U^8ykMsn;u*m zw0O+t&K%d)`1B-xWS{+-Yuk_8>@cv&o793KfiA7e&O)@3n8`SG_AMYw8WN4)#1&JL z=_xwRR1?e9_7%l`9E!j}2rg&O3DW(iyb;UntsEl3Bk5eQM6TCk%MHVo(IwvUJ zF?_XSz~B>j8~??tp`0PGJJ(U3`m|jxfKmgPwHNUeB8d2irAcTU`mrO`G|w#Y z+uK|O>aFkMl%=it^&y=g71Yq3LqT;B=_7BZv3gX6)fwy06oe^F*WJcb`#5^iZ@%lt zeB%{pqgi>s(JX;LXy;m2g1!j(ShQYZCEL+-9Mx!L`SXWaPtHhdsU1=+5KWG${paS0 zX@rwB)gcm0p%5K~bA;PMXtLmV1!ugI)U6M#GC_XZ8kk{q;mz^optrU}#EDy{{gAPcMcql@gyCHgLe{%R{Yd^vHb{(Le0ylH_-H|?DsqcvJN&{E;jH)N zvnIv1%(*hcyC^aV5Iw=6Am%6HcNE)n7t+?dtbu+sokVAe=f>0&p9s$+&SUBklyPvY zteL^*pI|?|A;Z2+2+8XUm%CZP$LybtEOKf$>>@^`E5C)wK(JR0G86b5a{C5%MFio7 z>#5sCPLEB7o?YXr$u8-<0rG_#S)UBeqtGX@ezqMjwNSu~9LO6Tj@p~lIN_Yb->_c- z-y(ygn3-xDi+KYF%Pn~#SX7QRYGiueM* zJ+wLyA9l5_&B#Nk_2Y-F6q3O2bkEK5!p>)TcKK z-&?=%U}EI9>-q>=S2`+O`Pqj05f-VMzG^~>UODa#*L2NPo`e60dic6BWV#O{a5LrH z;KXaXVWp?H%c%j5b!-^8n>csfk9YAF^Aldr$j=_Ob2v~2@)~!yC4P(jpnpT<=QkG; z{&M|fb$6XHUP;iCciQ#oWVESw>ExVF^Vw{5m7);{kmA{t?pTTqiJ)NAG|9T$~L z(9 z9#Bl`Ck_Hnj`GyY^`G?FD`iquDR)`&WXTDfI4z~DesyTJkE)_rgywUKR8EAjHXJf5 zGSsQbndd_5~5mA3MdD3?iARt9w5xcEMyUpeuI(a1AW5@6@pFQ|U)B@E_yvm`Zq z+)+Xq)wm6$`vvtUjd}g~WtU`5UXe;3;WKEu51VSfP#TJ>SrsPNF_F_KHjb5$!6Rj##Aq1@6;f+(+jl023G*B&ERNv<*@o<$Im zKZJ@N6h-X(P!!o!xiortBNTQaki=vD0Fu5MPs9%!wYs?@6o%rSdU zpJ5o8Ae$-^Hb(Ky1^>y=>SPf6&L14))rg}c$l%=o z@};|ATv9b6B2m*DQgVw<<;K$c%8C_2_~+(|@JTarxfD`xNFvoT4etUzx9+EWaIO7J zBi$TNS;~Ufc%tWLy_uk|-almSUx4gXdg2Sm+w2$CKRQ0sNAg$Vw_A6$r&?ix`hVUt zB!F-{G%-@XyYF8m$UC*d^LyVyhvtMsKJYb!3w+U&y~&`z znWKntEtaDWn2Xlcld`2j!rf{(7a+1`{FvD7^L>QkETApovED{4bq)=H2s%98{;^x@ z4j(00#`IH1w8#AZ$LzFND=@XYTr52aDXYRCnm8C!s2BRc!t_nCHmW#-IC3$|r;zDV zj%ldmPM<3S*&fDw7NUT|z$fR$Hxv2OjZo=wWOi??XXxV3QTk4l$;cFM*JI8q2iOl1 zcH5BJacbL`pkGZ_*CJ|EiH>;{bWfcFGA6DkHhaT(-Ny}UD!-0O$qZ`@D5oZ0 z<%ZXqAM-j`^TueA>0f7BTTn_R%eI)L$EtE#? zn^pFDR@23m@xcGCrm=;43TV_6k4PKA;GpSe!AybAO2XlRL6Ao@cNRm|#-#Z!A5OF6 zrJqN*ALYMFShYGaYY+=3n>F44`<(I)uKz$T8LzJqXP{QH7w__rjP(rtTq>IrSsBX2 z7r2imkpbbd^Cf%5+(5;TdEYVxsjZ=+5LPU%{2pC<=@ZxzhSAp2I>Hqj0=f~g3~_>y zZu)V)e=TogevG!iWVv5VI6MhnIDN#QCkbsTt|XCARq38nJ&G6m0^`Z?$GkQU=Nopg zf~%Wbo8RFl=l0*;PGe$o#u;pQd&mQ+fL!l^t7^!bQQY{|$rq3l=|`{SpN?fi`56h| zW?ee&28^UZ0E-q~FAsDTbldYG#qu{j5Bybtu?g~Oyf~b}mxT>}c6l5dP6- z&;;&vm=O*nw<|!*dn1?wHB#Hh-2AX3jv1RuLR|4nLmz#kf6T?tt~;z1`FuQjW2j8} zHf!Eh3b}e6X$s04PbmUwnGipyLhxgw&VCnod8#uOu5SRH^QpcSQh?lZ_S#!lV~9m6 z72AkEtZ#r;J4r!ZzigpuH#GL+DTwpt1 zaRc~NboS@M2_DnM8mG&~bi1X9?W$otIwS=Hy z%Ua@ZbH_KJmJdCbPfqJsHb~g%WV3AM96<%!p#B~D2V(}yY1E{Vb?zJd$x@}Itd~A@q6Y5=tV4zmEw*~&`_jyq4Gby19=ycsNT$;&Yc)&to!6jXf2fD@6 z7i|D8eMC~Rve63rd<9egPyv1vBfcR{GMi>2M!MnUs*d`j289>CKcHI}nNdQa4>Zqh>Hve$f*ucrg77MvA{~nGHE&+ayG$r-;A^W+4iqoUD@^4yZjceTx&udWRbSP&F+9(n%Y0mza5n(84Fd6y1#nNlx&qpy=T{=K5q^BjEK}6 zl|;R?UFuAM&&#rP*gM z9AffLE#Mo)ZBF!WR(Lrm3nLJKjMDZK97s?y2y>Xxd@CxyGV?Rcaa#P zYc1H)z{%ToJ7T~w*;2qUDabQhl1Eb+TDp}UYMCAII>)~jkvtsh^r`@PBdWly`;o{Q zbzVIZtbRt<9El*9fD0VvdXV z=fxRO*)lSf*XyKPG8iVu=0p$DrXEPCdz|W7uy^QDylt8>MXP^H*I8bEz2Ev+k4Bj& zgYHH|%&^zm4?|KOThe1z5X9ANH_Kh#x%JugD$8(G{+A*vncjjh=%H5#&$X8jx2YFL zP$H;CuQ=|~Ty7!`pH->;+LFrf>uCPMpHdCD>>#@6X#Yl*7Q5YNgEEnrv2u?(3}Ndp z-y@0ehRQtX@6?L9`9^F)Of>K;FYb|w?pcdh>gx}U9YZ6wzxs&93`AdZk9gkeABH^g z1dnaU(F%!GfPN-3fW&X!sc-zYm4AxSa)U4WB-R=;`qPLW>$mMG1Jx=imHP0Jp;XEB zRoaXzeM)=uVr03VR6}uJD^;WH!eTz8g1A74hSm`As>X&@Wk>8Ex8;QP*jTq^$#$93 z{NQcXRw{qEEP@Y|tZgjFJr?f?*1mdQ?RWJQ{p1KEJA3ko<^}biQ_ob{wuh6Ut0icD zDSf#s_-Qmqr~Z5SIr-`vuPE(91uZAg4pTX`&O+>CerBI559%xqfhXcY$W1^f`0NIXrUi279sph~SH}iKTI7E>^Di4v zSN{(2n{BA@*UJHKv>5|G$i!dJ|DE_PwYyNCUqd1Diqci0Acl?$_+JMY(t+NRvJX}8 z&t?exgbMh#Le~+L@T(PgK7u;@7gKqB4t4V{{&N2YitXRJ1P@S_|5^i~0HA-y0~!b@ z{ReBI0m=WiK?V*G|8MLn3V`BY>kAq{_f_+RP=7=!6o@S(1qa|mHb;~i00Zvzf`Lx+ zfq?;oF#y;g^;ul_CiHyq%wm*$A%_C=S!_l-{p3q}oQou5dFsikHol;EfrE9w(E@8FI##{~lX7|w_^-i^w}=OqDFHRB=R9xMA9pFKYz+jqJ> zHxPM@+x%T0u!(dV9H4{WOYTXV?8=yspsA%z!020;!U%^pxM#*vO-)b^*x3Fa@$MVZ z2G06-B4nAj9RtX5-#lhflGrF{;6AFc;HaQBgE&**QVPrEp!qz*yd06kf-&Bept-le9qRYz)(edHqIkeP zc1fyq7#a0B*PwuCMY2tg#!T?7C4**I3cec|JS8f>gsFZD>zTD~bhB6{<8*wEWv*qw z2OS|ud%}o6+q&zxE6B(kKyK;>gd$I2j<*J2DzILY(^HoozJyO6265i zS;d6Br%k2|fH?Vs*MNPbMzahfFO*BrREeh%a?PORF+aY$Nx>M7l*&lDU40^nx(+j@ zV48g71UBNYr^kEgi)FCXt}~}0>lN(GAeXH5F_D`D5JOy9?1&V6!a} z#qcvwtr#LLwkI7*2ANsMjF{^ZCb{`p-5Z~a8ZJcm#lG!H*!g28l??p)Eg^dzAB1}# zF^<<_Ri5^Bqp7y`q^~fOSF$D_5>(@=i34;MpE9micPVHWryLr+7pPV&yPiej zrO{y#Tb%u@5y$yY0(Z8XjGegJ^-N&iXRC&e+D!|2dEV+Xxe7lo4caTlhds+jS^+I` z6Knfj__j4kB@xpo{H%61k#&ic(b|j7)9$2elRw5^JxAEtHg$@MB>hzkNkLnB?k*=@ zdpvo<59|p8XZqwr{E|f!KlBd}V@e#|40Ff>*5F&h(hk)=v{~tATk~`oEc|d``=dOT zM7wkei#QV({f)n-KQ*`>SG^EQV4e#`kWzw%1__<)Sd9U9H8c zc{r(VG4Dm|&WZDDL6Y*&-Ua5bt~a@FB~PRbdj%2$ITCT|%oFv}Q@`q?uFNN5-w9o{ z-c?^mRE8ix`}srrRTsNlTb>flUO*d-*ItS51|F@qR>M|AOyH2~WyFHeiYOZzI1~3+ zN)TD=5w#g=Gbr6^X-gW&g!I*GtRuq z{1H{b>N{qnDa#G6AE5!wOuZ4d6GD@F@Y)ro{B7FMODMHJ^eK_2&N|A`?J+*svOUgQ z95*AL1!tKvk;MkgF){wxQ!Y|i-d>iJoG_Wa0cj7j<^G3eaOQ~C7ygPWx~^lCu44hC zr(3fp+Q2I86?@OnQT^7=DoTTaMRiCLl(rf3_#|c2*48hp(9Oyq%i|0aOCnuS%U=#v zqd|@R8?E7=@5E!?_QgwU41Mz#JJpj$lWoZTtY`EC3br64nHTN+=grkzYqHFzNHw5c z!KF?L^aEgZYm528h#UFggT04bzv{`MN z=ZQj9HgT@y?bGE$pIyXubdvQ^^95JWR1&iv&CMKqfV9vLCWD`I`-ec*@f6 zWU2eOjoL8nymj~KAh#7NA6;74c&s{ObUJ<$_JkNx(a?fsEJX%eKPq5mXy;BS#dcU3 zcopV|4AV9axXWJf9J<+LkIb`#jk6U`7mbHo`H%^7pH96DtSs8bzZ2br)Nz{tG9<_L z-T!=U#a&Z40%5aaWqS!97@2(Yk$SSE5tBn1{3EW5`Q`RtvMV$MHDTKu4mb7CE}7f- zz4ZZdMiSf)pvciVw}2Et?~jUH=EAn5Vb)cV(CI@b&0sl$0VTCmk)OE6@`~U^;>m<* zSh4A4`L44Hqa=y}%wivVo)hW0T~v~)2h#7G1!spYGZ0iXhu$AhW&Sf8HwuiAw*04r zPp?e(Q=0~p{b*l^r2D4D zx(YMg&ntFgJtfSHiW}RwA|`dzYtu_kmLHYawLH2iT$X;a7`gr`shtC zIYeA_0Ug!enX85!-|yO5aa*gb6^E#6$0K=+SqrKSpiV(vGP^h0jHkq5z+rS*NSJc| z^Gyw7(i6>P!&-p&Yb{ftsdbA>wvkjE377Xqz!QNnIKe-=d3KlH1^!E|fINw3X57zq z=cBfej4vVqL=DGT5p$Dz%1hria5I;3B=t63A_|h}&)!BxiEvKF?3SKf#+mPnO?(-{ z_}~jd)*4ZLLLZdYUyLzdY=&G@^Te6+_AILw5D(d~%k;7UjJay)77#P zRdS!PQ7gD)Q7Ey2i-**`;FXl4Ddq-Bx5o)bveo#}`mODV0!!33XNM$>Q0L4{rxrM> zh8(F5NOMXD9CWA6s|$b+Y?;vOv~hiQwi+O?36bla5~HyU;zrz(4R*ViVZjbhsOCzB zRuKoC>9@+S_6i1$(($YBmtuO2bTPQTx53$?+2E9KBWiI;1Ucz=J zipL(irf_4}b_?%jSIZHGM(gNjx~Ep#?J(-SXAb|i(|A;Ge97(~>u>a%|0wF5d%p>k z!mptGW&N0$V_kyj{h|)7s0sgmau^c})xqhF^0@|y{M2XO-4ZTanjz>;JLDp%B8!xF zPK(JeEXiHcx$Rm*26Um@YvB2924tGHHHe zqix`L>ox;;7f-X?E0!FWEhg^YrvBFJfoZ(K4_(|W4t2x>l*0AWYMYPn+Z*L3P+gdU zhX+NQlOAvd-VJ(EWx>A$CDYok)VglT^9^#A-Ub(EEHOo$X&Ijm>#ccPvz7~kB4`HR zCoa{cpSe7Zo-I(N3y9wrOf2)$CH8=c#qZ==ABa&#wtei)-%LoY5G*sip|1od`jGtL zvc|!|i4`jy^z+#Lc3_dq-ZQuNcyp-j3mU#e&HPh-!XOeT{UvvwKXnIqYYNLHw~)E! z)^&j2Nhd0{No+6lhuS}7%_Fu1MA{2*+K^-Z^}f>J6Y;oooj*2MO2*W&Ze49cdaBDC zlj**5r0x>31xZEztwLY4c$jU}Xtr3P;PiR|BphNsWZu*IN9?r`-QZuc)A^K#JP7QX zAg1ZXX>lhE_3w8ra1#&U#1{LycwTq|jWN<6R~ zL;L~9?h}7xgp~9iLqlD%_!-~sG-d~EiW)0UAYnNVqR3>{FC!tE^SVf;MZ?h5xCE!X z+;92`Tjb~47N@?AKNP9VI_*|`rz%yvOTpw#ICdOJl@Yh~NhsmMLxGm;Ai2oecQmT1 zghE%^ltDyMIOrXcWhg_aaFeTWaL%W@@2+7=zG1pEVl0G=j@SmjenP*z+j4yjpNjvI zJ*j&(h~xlc(DeQ8f0fPs>zy`|0xtehh-DN2t5>EHj31pX31KRC5&ycREfBBE;-AY` z!T{L)=jX+-09sy|(_{{825>+C3@nNQ5E_!y@Xc%9mzGctz{x+K&w~pf|LW4sONyE$ zLB`Cz0mpMd%5ZFn0}#OX+gpGYhJ+RKX}3qxcCRR1pz<*!AT*&;Xl|e z0=WGLn?)h;ufx0px)$u2H^A$9?Jq+UKQ`|1eWYTq}2hFXHd!hdtUUGO?klY*IpOnvW!Pa@5KM=MN2x0ZJr)kwO^26zVUk4hK|W{D;8C8GyCFKqwP%__vQf3s4IE7tQ4X z;Nc}{}qF>4!A=2J6V)S9c~@+SEYpup796bx6tha&JkZXg6IOkjq%^(Yf2yt z1VFM_*x|l;!vfxh1CoHBqabS}TmYir|Bm1o{4T2wVG`KM-n?OZ9ibNuA*`!(ks!5P zFmgL?PQNR;2G&wgM;8ECBTUsB0%anHf@C+qMQC=~XLbRaMu+ zg5^Y=@0+$xv<9XU>}krYzg`RC#SfOG__-?Xt^QfgP8#Q;Q|u@m;NBzQnw(e8d(z-FO^q_s~yC6+89b+cQy=BbnNKndG999q>yt z5PF+gMMR~)FRjQ>`gLep>tlOPoxq-6LCsg5iwt=xzbYF2PT><1(g>KHZF;vktxsRY zGk6}a7^lmoXA2F>S5TKh`_STnw?a_+tnj`Tb=H%|OP@8mMDlPW!8$2Js{@hwJq}gc z`@dm>GkYqz(4E*SdTi|+EjErqCz}+G48AVHbUAtlGQ&UcH0mrhI((84HpWIG%Ct9t zK|kkB6ppR>6Md8g9O8N@&#o_<`Kdfw!7#_0F4W)loVXbtYUMeRVqrF``lbUZX7Kl z39+=sY5{b$C+T!WRb^@udL?Sp^F!aDMgRP>>*?@aRuV~VWa%_^ahQX;JAXfxQldj> z8)cZCRUf>sFv#Ix4$2Axghn*YG)so#Mrjy*9)-7Du;Vb{MiliTYdO8lH@V=DS{)^g~bUDEQ!*EwG(fg5d4u4r3Txo%-O7?IuK|78k+XCLpSzP1? zUF{)MC-57e2o%AQxR1!8XPnFlaVWPJ#QR(3@Zl8;`aOJ@CBn!|1v*^0bqt$F;5cUTH6e4I8H$XHSOL}NBD1b-ElpzoHskl#qnr)8 z$*i{AFYZAz;2pMm@RvR`C`4a&*!QvRC<}P&t{3X0R!jz^dq9%i?az3UeX0{T0@D>vE z@7}UrJTqdkNWej`R-is7PaZ99FFeeVC@p@4sIa1{DMbV#GiB==Lqu;;Ra1_>P*V+4 z98IQkD?(%OE(Hpczy;NUUP1tyqE0?ZF(djLBry;vRC@=hvc?wY5sX z0ivNCmQLd8(v~|6?|>{!HEcLwDIf`MGyEg;4aU04IWUL}^D$y8P^cA-l!jmNoWBnX zS_TB4OMQ#ecXqkWyPS;HGSGGB3@4O^W>D6GLQ46Q#ye!o2D*p#RtZwso6xI282TOU zt)s}gRA&cy2AY>fO_nL$$z=G3bHEmPp9=gZd58mF;D41LpP#qI5LAMYOK~gE*8t6i z-l_R!tsva=o!GlRHuzk+TPvuO0H~k?L{ku6r|h|35E#Lj!aH?~C+HpeW8#)a5Fh-r z4a$ADO$= zX!9X4bZ^YfH~j#RLryB!is5hzSK9^}tOrUHhpg!cc7KO^;Sa$T@eUgL8H}l;RkzoO z3U9LCUUJ3y0DVftkK$%ywyhw#J&U=9ES~UoHTnD}a%4i>i}D#3W$9?j=H0fT zL?L}(Kcsw_d!n4gL8i(=+0<;Gf&5Y%f(NxjjdUKWXM??fC$u$j$Oq*5u0*M|MY`qd z0glXad9A2Ar9H*EH^Yg=df&W2eb6T$$=z9~fX6ptO~C=%*%vv~#2~gVk`)(b;Qvg&%2G%&lahXCH?2 zI*0E=cBr8fFS@E!Lt;QT34xPp!%kAqUaebiW9r@zw7B-QOa^_}b6SgPbM?=PRdDXi zighyE4GMN%`zmt#P|(QkS*)zyMk=i{EGLLHz)#qml0J?3>Dr^p?aH@`eJLk;!6-Eq z`=$=rGMv-|t38t7-X*(VDaIVCVvtLlA~EnGj3eMIUc>uTdYTE8aS-3JPCH$r_{)p+ zC}GjPT5IgX4CW_4^_P(7KPulEKlSC}qibTcG9U?`x@&OFpcffh`K}K~V$O}_ySa6S zEO=u&q}Rc@6WvNH7)`I|WVAV+aEsIqmmKXGOjy&rqYYCz*09nod(f0VkYiiieus_g zD|S>9$dRskC7G}RIw7S$3rjrhk21b(X{1F+;$QkD&ii6JV74!?RRKJwJa7-6+N^8| z_jvXwzHn}wnLqo=0X~^QQ;h#a>N`8FpmJaK)5S3uurPVlBkj?#-gTa547IE~)MmcG zf}wIrv`(3lw3zNA9dN!<74AtH7M{y^#fq;Os&R=1&x#}jO2Fh3Nolx3bFz|mudWz; zH%)VABR`~msQjTs*Zhd7S(YxDZj(q~a~koP7=2~@bB2{wb@Zh=%aq&PxICtfesq;G zf{C_Nk#gB`6k2NH`+di{osGL{{Z+acxu9{aR&ThyHpJt04r=Y;h6?YhIF7}B6J~#w zJ3&LFceIx{AUi&7gdc{+lUQgnZzTQYqUI8<^MZ~%^gn#&5j^CyoFl(*Eo7QC`2Bbw zw!7Sb8sF`cl@G%Y&R=0> zvzM(?mTdYKU+poMb7)0vxdt8d`g0B!!-{8z()L01cDPf`hM5wfs>(VkZWTy1C@qtP z&iqyEA>q1LoQM+3x!U;slVJ&M>>;ttP#NVS3qua^3W#_YLN@Rde68!($QJ3=9rIhR zY%+wX=LhFw!L=SmyF?$)FDRO#7%}414N^k*+DL7>N7J#PA9J3OyPV=8PcnvLh)uCr zueCu4j=>Lj%(183n zf4xFyYM{(tue7C|2bc=|*FPjejhc;yc-|!dFh$4z74uOL*z)Sg@(BT(5&yb?r`D7P zMGzl3ACgm+`_%;`k^vHfj=iv~2w#-AlI3VMN*jU5cD8yhd^ydUYQe>-EZ@wz9pw!Y zSG5F{!l={NXn~E@QQ-30juWk$pi|#H;$H$4CMQv3;S{>4kcf2x41JPWXoLOD?y8Z5asDkwTL0T5+ci@>IoFkyHH~3H=s9 z-j!@-{my>IGVox4W8X%ZH5`E?!mOmwuJCeC3E?2b&wD5J*5DthFxEbAd$*0~NBc#mkPkSfK= z!Y^cS&2eE-Gkh7FKW~IqQdMD4W3)H-_uU&DrKXX}wn<1Xy8vNf1}O?@J&=$nrWn#4 zYnI(Te5xhD^mG#LP(Av&OEp4Rb6>21l`=nvsc+5Da828T`E{&X``3_rTfJ;siJPSx zr{>~{f73qg_&ha&quvNfwe>V!PSQBq0(E23QLBc6&Tkxf`F_u&wkZ2C)|fS^l2bKM zNR6sPQpx2sUn|IUp~}tU{SwJ!ex2p7{*?C}Q=@=*786#vWzRK>2~Rgj#CP;cemUC@ z=>_vikEkpAnQG0D8J$MFQh^~P?#d*$8742kV3PN@FhrS|TX=-AhpYCw0|Xu7leeU3 zPT80flcZ(xncDH+S6BSMVWK+l7LE^@w`mfgm*vz~`1pakx8_@?v%}RrO6@b{CtYMJ zxXEiEskizTckS``S++&H#bx&t4kgQ^v6dDf zH8Xrfhk_a7O7Yb%BBErpkdJL;m?W~|a(>R%Aw9UCSWj89P8ia;VeDUX-e84w=sgmM z3w~a%`2q*>bJtxi{({FMc#nTyaD3$@J^0lexwAj32b|MD6ypgNqg>e*-pXG(til*2V*60c zFb@b=*$pjqO?3xFIP(g7IzF9j!(uM=Pok#~iZ%vk$!hK!!wY+)m7?A|lZ)CWq|kQc z{nXHYQ8H{={cK{-cOJH|c!4wQwB@?V^2p0-;23zWu$2FGJmDjs^AMf}@=(fBwM9J7 zZ-(l0P<;XMBQhwPYAuP91@s#fN{>gI z?p}w4f`c77Pq3l7Lz|DcA=uz0TT7RBZX$CkFH0Y``MT4OqG4i04?GNi@d4m=#kg97 zKzQ#&BMyI)iEG&wyDC~-6rr(?UxlJ!e1;vA80YW5WD9-d9DmwrPcX3(*a@Y$!yK=& zK7S{>(NDHRG0v&UC5e;OmvtLmK)7nCXoUF9bN)#0&Rk3aC(>j@t~ji{P#$hKv+N1) zCNoo(o)KA0Ba1WW(vZ2=KCzqjyJHE_707-gl2wrf<}ff`5)>1l%I9mD@k#%ubOqV4 z(f63@S$ZD|lfx%u(0vK}ZsR#rkUOcZE9@b0QF1<)7z&ZWu<$$`r5}SZWe5FRN=NM9 zUXnmb(7WMiPU=tWUEkw}qR?-2m}X^2qWqb2)9ws@6fMe!-zb*bZ1it(PG7;Zd;y7H zeVXYlaNhg4UM1;j=5vbsX)>C82~X@=$`*xLqj*&h>om(A`b*6hyc5pQo%mj)&hZ+P zaLi|2goAs2IRP%ocD_Y(*TIC%kxxSjZWtG%V0}Zg-*ECljj?r-l++q+B%oE}oCi0&8lZct6( zeCE4QIM@kS;^=Ijo1w)@Zc~8A?qu^=jYC!xWCc0h8lQ#=fLi?ALL;gxxnGXS2xT3bOAAROSYWgV_TUy+2c(ZopL ziyR1T``17_nE~xz$=`J=Aoz8PryVfo)!O2|2R6X{U3tP4$n+}u-W~Y#7lH4+fIt39 zrb<=#I0R894hW`r4^c)X8HT8Z-Um4SD&^i6I0p4f@s9WddH<8w{1Hg_x-3Z`aPoE7 zuOOh(zu0Y&Fd)ZYBX3dZw*Cl-b<>3ydI%Go2fzWFh5=!~U8xX4yfzF7`>JtUBvAhq z;l=_rU-!x}3ApphrvJG083IFuRKkEMVj)~I2^a>gB{BtA@cKY3U|{;|qgOOq0X%y}?Nz|2S0qyd(f99OGuHu6UlB4zZ?{#R-1hDxPX-@$m0q}n>D4wQ)f`6y$$RCu5L0+=dNdE8i*WUr!ArJxFlLrG2 zVzqEEb91ZGLGU)z!g+SupX|&S8@IGhTDneTwbRQ~P7Z5kXBnU@YF4*pNcuLbU6?$k zfR#BBqSsEY-AfZ~r68rai%F`)V=G7-B6qkT!gZ&ZM4?mAgV?GY{dWrU6*Fd5~{rUr1k> zns0%Yu$tTAu*zD(k$XWqOF7fpk(<%3$BAe2{D6B+@W68$V1;-Ba5fBnkl>dO3C7_#42i@gY{_t=oe~Lap?`PuOa|k_ z^k;HxaM4zxBGuzKPU6@*R}VuaYwWsEj^4vWSM}zrZSV?XDsE*05~t&@0aR*7JiK{c z%1$gN+X94&l&bxlBkqw~yC(yyS@_BM-w{vq7b`N-pFRB(AFo?JiB}VRFak*>%0n&b zv0S8G2>`yTBN8-~S&dmz>JcdEG!HT)bJn>lP%8U>w^}o|eygHBf0gFQWkJw}6+O(9 z^|`7l_5ZN-7GO~}-TyekF5SH#A>EBMN_Tg6cQ-4bOM~Rn-60(U5&|OK-7OM=B7z|N z7kHlcef@p^dtF>Rob#D8=SLU%uqODePWT9voqzQhpnYdG;mRys3ks-iWo=&ANQ1zWIVjpwN-uFP!O>sQZo+ z3y0lNz%OD938g?+t&;vkORlzTkFY5#-=8 z!!@!WFWa_7jIsu^r{%|@I(Da!MLZVY5InS$00rUaBfJ(_C91kAw@tLF0>;~UGUm@x zCCxobJ~)FN6a?plt6cY5$0_Ysfxce_Or)8rnEJ9-I(g)?_M7O;Gg;^KfV%UNN!y2{ zU%3>j$i)wY!39-*yYAm#|28tKiJOf<3vi5?ll5E5k>k~fw;y-KYGtx;|K3JZe6c5= z;S|Z5waCcLst{hmz`NisD@Yp<%bgNOC)df(Ta*q~V9Bm!%9$y}Gpswf;lK`0Ouaf; zz*TB>>RmR1DLuge&N6kOoebUICVC zHhwTqRx}BQ0*6VjAG!jkQg8d_d@Z^e#&ABDFDR5`Eo_j4?PUT4jZ>~y9vP$3*ApjR zo7-?ie=T~N*Dw^1XT}Ty@z1K*REO~D=9#f(kv!)zNp`X`Rj@#hJ>^X18nI04cTGf0 z^2xl^;LDek;En>xdZ3|W=oD!8^78BTB7xQ3nIS1ppFX?&?y&g#5PQN^Pur=qj8yuQ&P7yR3W@Hsi^Udf7!jZz-UgnE8Tx zqE)tPiCQ*;*_~TVU-Uvjw;TOr9p3c4rEU?MD3BV?z}0Bwz?4l=f+w#OyVdg`D>Y(wLe^0Oa8UId<)`VfO0<0MV7 zWlAzUGvgZ*^&y~~%Uk#UBA4gNG*tzQH2UmRUon27vhuXZMEu!!5yH*+#yeslC5%*X zGJAH_mv*BsPWe(IF08c%Nk)8?|GR6%bK}v-C~`HLaFLl}@zsJalqX+%)B?Z^@2kz4 zW%zebIg4c&<~}b|KYb<>?paL=;s6gVy0%$@V6jle ziyzrV_|=i17*Z#le}6d6vyVXngzoLbEe@Owlc{jjQ&abkB-Y9aG;w! zhO3tbxw<20ixKu^2>h8TLtfgBt+L!sx>=AV`Y~5iC)r@6nh77`3Snxru6%E__|5vG z5%Ld%V!2<;ZH}b~a7s_~C*m0|$_#vwR2`Khjl!cl_d%GS9L}$0lt*H~t#Rq)hz?uv zf3B&;zu_fN?MPPhvPQR(nPDLcYCI)*N?hmXq4@w-05pwq>$oyUh$fk59?7u{FHv}s-P(ZFhrpw4$R?olPpM+ zE)}`*R2}c_r}DtdcgjVVr$#t;c}viurAgd4S5+*o-eK=z)bpBW?d(I zJQZZuuX)`Iu6I!qz^^nZ_+ykspE3NPlYN7adLU+1D3UU}x$`8lEuf!ARFq+Rc9S0A z!qSb8)YX>H+3fcOIX?Gl7nTruH3jFz(3*1Au<_z@>>Z?7%6YYB=1}ymXf;jT616mE zyemRueE&ruTBle^tb7pRwBF#DvZ6VyQ2W`uFzp-n$ls*9o#6G;U)%mj#TwnG%f_;x zU=fZP*)Zu^(mT*gxl`?TCtO2J@2TjgBzUg*0?M*xq5G0n^x`v_t#ETB_R5-|imIPn z?eY%d{sPAhVq{O;Co%MRqyxD)I-T>$yN_oi6+-!TB^jXw?3XAcpw2A*bX(<143irE zgSV_|>S-VNbHO6ld(7Vv#nseu9daB*FXTuH!{Me-;McW3x@Y<|eZ)xd!;04uf zk>l&Q3aujel_9T<9Q20NLd2aUQ|;|WH?+k0+Vfp^ zg{mPZjZtlF(#=0)^pa6`x%Li*w*7+MhSO_nF~qKX{RZxN`o5?gVlUBDG$WRPIX`(c zY17b}m7?*}WN64|ml4cpay49-(k1-PeXKkRh*P?GG+^!r1Sty@%l5v#%z5UjT`)+g9%T1AUU-gZRf$1kJE$__jYxu3wY#^hBcu34=9OEm-oB?m& z7=M4RfbvH-_RqQcWsr-@Rm6Aggzs-xn78iGt0X^-x{A~KlNqSss8@{?%Q7M`K%<`x zt?7BmFQiD7=W80w&#iDsxxytm{N_}MnsJR%D50zdU-5T0Hg4$0dC@Cx1vHv(DrCP+Ne3fUa4twB=Tz$#NqP-$*?ZpFDjkb z70h;wCW>9iRJ{7?s0y`Rnt@LjS?}rpIJw;~&9K~Gw>QF8GgGkD4Bvxq74)hHfc^KH zgs6pS_1}fi;Tl2_;=?+%-8d)%36^ag2z`kKTLC{TCBb<>P|Nf#0?y+?YyS%ZCGx{k z3iS-Z=y6eHe~l3H-w>);93L)55=nD5gC} z$3_JXNxhW%Nb?NdI9f^pg@ok^qozI}t7KX*jn2*59QZAYNONcJO=#3sai|tTBEXWB zo!yIdJ?Llu-Ln%B?gL&%E7o@H&W3S;0Py~DKyLa$?%nMT+AFO~y;t^e`i00JuElUG2Cb0YL5kKwnhI9*9 zbu0^%qgriv+9PcLbX8^GFdhehpguKajm=p?<#r4WYsPXx;Ph0D4@_BlV*#UpHG*Qk zHO;qHw)&Dv<%0+%&eHm`mZ<`S+IzOyu27PO=B*P+HBuMBp>mzMsiXw!)yBD|hQ@a1 zj8B-$+H6&Zc627mLmTdWn5q|Oauon?HfY>6SEpx3PJpAwg~^xmr+CHA@Xds5cL z80*$CjV7CV*j?A<%uao?*#Fbk??sOf zG79O=A$H_S_Kl`)VhqEZKp{85Y;FB1$SxorpPxsC-{Znl*K*a72mR+Oh0W4(yRWN^ zP1NO1TvHW%z#-5fB!z8J%eNB<%Vseaq^vLRXG~BnwHETVl;Ji+dkgP)1sf5^0rn?Jn3~TZUOFdIbYq6)VN@C+-BJbg#{+F%+~{!Ed81g5{Mr< z8ZqIJZsaTUH_u)(A$G-ZXn#KcHscP$X+G&w1XXexd?LBDtm?;y2ymT*%6wYIwp!_R zWe0$M^)MIaRC|@BoWTk4o$4c4LiD)E`#d3nAS^NSv~ld@I#A27?I%OfvKp%eQo~Un z4+vb1U7655jXr2^d%sbgW9FN*$x5+g%5~1w(RWJH`;21gEC;K7^0*g|gScMwX!F20 z=cL~Z;uEwG&ITce;3K>#KPuP~fFPhFfpTnoLpBK@IzcqYj1Xwh7t2)Tj$@buL~k&} z24rmMo4ttvX(`>5(?$Pg3K|tUxYK0^+qWWE&bt0NPRKBe*Q6sUbuG5d!VDo*XpOP< zpiBg6GKOw2ZcRy@QUtJgir7aQ6xHdYKwkGt89w#a+i0tzb7zkoPN53cOokU_+~^fn zX6g}+W6BqfW8M^wV{9@fr}r4I;bs2Bgz!}BgYReNbCbKw z^us=8V#*8O7dumOXRc$)GJXCm-r(tcnGjs6bg}(B)_gIRiba9~1hmT|o}G!Zb40_? z>$(W|bV}(>#R-nkmyEzs>&o1nAZ2@o>=kSeijJNY-XrHqzmACMuBFD7r5SiZWYW4m ztxLK|j~7^wEQGBSv7hFxnil?Srq`D%q477>TP9>eU=h`ZXouY#cFfbAQ1H4B;WSw^ zzKX_2$9*JW1dACI-(-?~+oAT1F(k%p?ozk0S$r%EJJP<*+F`bEqwi~iZrIZ48o%a_ zP_YhBccll>_YCFYoNN+XR==Pp8obPpciBpKx(wZ>{dRbF+L6}q&9?mPyu@7t`TeiJ za(9pn5~t8->e;gRoX%l>&TcT@X(_yNilFj0alY@tzlL59OOubkPoQu@nZ%kzq2VJmmJ1R3wit*6e!ZZLp zxqmNB3%w`?;zEDY13o^8ZesxCp**CFxB!BW?$sIm04Bi0bU$X;VXKV^2X{evUj;q% z01_x=8SJRCQz`6B%U z2j^5$5LC!20zX{H`XM|7_P3?pQ~W+b&yo>5#%RQjVmTDWyiVlf2$ z^|jr{py7;$(RZJxDm=LYf(ZP*Fy`4K8rmH9hm%tX0#tZ6N&a*z3oi2CQdHMw_+3{D zyl2iH(}kpfM~X_BEv2-~>V-&*-XdDZYFahPF23yHq8)x`&%!m}lgTyUP;`5nG3&k> zNa7G}tgX@bUGs2n_p2W9@@+=I*q1oolu<{i5%1X>M=im*(b*!)&DkOq<^!cY)@vGT zUfzajpS7Lit-+w(Zk@=f6y9Hq%m;FK9?xyssF(|Q!OjAA5Pq50bOuQWMP3N+YnL|+ z<6`Q&sHpcozeA`>}t(3z^i&Px*wDqv+Slz)6 z*|;cl;~p-SutxSz`6ANCVOd0#4*!h_BgQhWX!I#r5xt0gQDPVlH922omMloZFF3H8 znZ0$F02~>I|6TiaH36UgPv1gi8mib^E9={(&5OtC*vUsP*KIp4;a3<#x-Dfbz=34{_-5!pgQ+) zjcU-~uUe*ztBW!;Z=i5f$Zk3>N8owynze1~nrt z3_xl>?2<>?@F|Ga=Y%7wO!;&xklpZ~&c)govhop}kXL`4_O{<4F8VU@uq4dnh;cjB z{Y(ajC^(_E=P0hom3q8CVrpr6!_1H}HO%~lU>QLBKHeGoO6{WfrBPrqla(sdPuUk` zudjmS9MZDP+~K}=BvS7(E43sZy(A_q{+#ERJVc$g5hfsLfG5W#Vy;R{I@T_1u9_c* z&!R44Mk`V-o@3sZva2wW_525V`p!qV{0Q)_8+w)bv ztU{9^-;WM2!Yh zS4y<5kp{no7}%L}O7O3ll+2#bI(NgaYSex2hEkPXBDh+~%%@45->t(@ozqK0ZQN7W z2o4RyKym%i0jgu~^pTE?^OJ9n;=Yv}V2&Uj9bSAnSTr z)AVd)PR1^c3sEsHP^3RPcuS4g+pWB0ZQ>I|W&Jt>tF<6njDtLa{z(1tRu6%QTBbjuOrE>q^mnZGW zbwnPYEk=&W`#R^|D6IA5p*}2=EU&RJiH+|-kt*Qxk%4*W)4>wO((Rr>#6q>h0LnyZt7Jsc~532XE@c>01 z6%My}|1i$ttB%UW&{XzYwEuIZ_-w%7@{9e5o5?s4f3_97UD_Yrx5JX|B&{iK?PKPZ zT$`65fgv#p>JQ&%Wvuo)kh7t0{#=5iEvGDP=|&M*640mRUdlT~UB?w-tcKJ7^cF~Q z^O6dJNln+8P$h$K;y?yY!+%uZI)pqIQ}W+QQ_U8QQ|^8y@Pzbon+Ei}^AlbQ@Kf;j%0= zp7F9ZR0dzTVxuTjg@M0#BMFsU@m^OxwDJLx40|dEO5F@}X8WN;iu&HcVke@sIZGZ#=$+aEQKJjjo zSFxQ`4dWNb(;byC8wBC#h6Fm}>SrN3_Qd9OW>xcVcY?6Y!XZ2jmbJ-d?;$LNcCAEa z7y=9=-&+mC-VF2Lr#&BPq@Li3$=B(1m@M4%LbmrMGA`a+wVgO6xAvm|J5QZr_g<&( zB6M6K<=Ye6potEY1d-1>T*9->p7I-<@=xb<+heHIhfa_D!taG*o9pIpvN$kOb-!#x znG=p=20gb=`zVh=+h_-KHQ^ENl^++IKV>l|=?s;|Fp-Leyb$Mk1;#r>xNK_8FhlFr zhUfrKN!>ubTIloKo?MP#t5)7uL~q7wQ_aYFts$hu-W(lVuh7i>u-A#!D1#~6!Y=bq zIV3^OmaUN?guygzp_iOLM8;3?CQ18L`YXdOb#Sa(vo=v74xo1yt#9(_BXZSv@pw2` ztdKcbEL!728gH`nf_qLCHnFyJ3gUW(^MGe2<`URTm9g5){CG30 zUNZCm&;{p8B(Ea zW$9~*i=>^`vMNy;r9F3TU&hfwC??+}@AGUMndBJ}OaesOb+t#ZL5(ymj4ackbHueXeD+8g+ZZ7&oZO)3iOe=FJKy(aptw`xR_f9?4O zU0@Bwvgz~3-xVw_yrQ@6PyR$?K_8#q({9qpKaXLptL6%Dv#l#;?QP#SXN>PBk~iO? z5|uQd$r-%)tiV!vx_o@&@RHE)$rKhT*$Wo>bzfUei-K||^}vr8=|e&Nd)R*ZSQGq` zLSR!H6rwLXDxUt!@|U2|HEbbj#Hg)No)sktwT@y2_Z;wOXKg&gFZ}*%<;_xIv?P`F z%7Eyt)i}e_E|H^UNpj7cUh~y;R|~Zcw{?E>Wf^(3z%Tf7UxJNLDy7cE4nE7+g+Y*i z3?>EX{0bk#voq1#p(dcBc=t>lf8aBD*exdcIk_e6L-!5LNFO5piEZIcXBcy3RViO;x#_Q1tRh32drnIB7z_ z-dX;LGWio94pS|lD!-7yxSdxUzh;@x;R2Jee-gqD>oZ!23%ZuEBl$=p{-N`@u%A4*L^j1lS+g8RP>|6{8Yi@Dtw1867{s4ury0sj*%}9JoF@`K*PNL z-I0&Kw19b~oUAMF&{3YHEXS|pG+ay%vaaW3;62QxuypoqRH4nHNj^)GY#bE6n%mbX z&g&&a@2Cw6Saz_5Cl@q-!KF+TJLVlOyCk@fI?dPj$F5zgygZBPJzSmp!aVL`t2y*z zbZt)ysdp`GZNS?8_X&#<2F5g1=ci(0Mey%l{51j8TkjJ1Ys$;NH^MPRn1FsK3b4hn z2rYZ&CV5z`fo!f&W`=n;h76s21Ht|0SiI95S)9?%P+*vq8hl zW;yvuA4(2czM)_@>HB95?Wd>UOcZ1`D@ZDoyd6G9ds{!Ez&$@(FX?@aFGf@_Hup)Z zv^{>G|1M{M+fkvro3mZLR+xI=mjCES7n`P8uNv~(aY3{vkQb({ys7Rt9lVbFfC+J&K;en<> zrOiBvcoLJnHH{9`-J%mpTWQaENf2vmqthPZBeeFQi@?$$QQWw7tGvV-1)k13(Bji1n0er^fV3u{%q)u~zrwnHA;NMhP(D+Gzy~54$ z!Az-5W7tHgR`6B(G))op9mGxBA(*#7_GJLBDKuyOuU#XNWsRm^pI`^a^IhwJbiR6NJ4^VqR%vRlIkren zNj+~l%lYff2hVS6*_wd%a*qgo{Y6#2H=Ap)SabU&%PU#V9^b0(JUXveGc#qL{Q4H3 z72wh7G3K+je)X~8F6f2^ZpI+h;EA<$)%^I#2#4-IxRQiKt|ah7g^QmD4WV4M!AUgz z=?C_VTf*w6PxZ`pCREy`uVl1iF#b4J4bVNRa7j(kjvQfarvl=c{1dcFNa zmA9kilV$`hW#12k&}n9{e|JMkB&}re-w#2ft|1d54hf!X?zwGX-D7HAamI69-PL$o z2Q4o+qRNV}`Mg++Nma+Ze4&(xKE#X(6%g$5mn90RTL=aYFUhCb-BuD6be_I|8rY3_ zO)bxopW53-=FjD8(vUAJOAp{F1tuNSRI~a(=Mrv%Q*3DrR|ieSv<%+SJ(*IgK-Fxq zi-$u&%hH0y@nI=Lbv3{3`v%?Ojvwk#ts!w*;ypyIo1;2pn}!%=yRHN! zko>$;+J8*`Kp)pBb!uP*SR1@lo4q2mE(hlVuWWr1FL(X4zYj;`5bgREzfW~*U;{KJ z-hpZSMN5xq%C)riX~iyo1_ zo>~;}29B*=G5@IBYaY_tryHII7BiiAITX4Gge6MnX2?!xCX410@n!7Ab8$i$4W^DJ>&;H}G|ABf2lKyib-Q zMGNuU+{CRV`CiFldVp5eMc>W>Ay%bdEA2;%ndSE8CPX3Ekwxqy`vWsmEYAy1$D`K7 zo|2dQ7w=ffj)epbnh}@Ya??odQVeXx z;*L6EQZAg4WFWMz6-47Ucq?fBOuVcrl3dJ9V&%|4`Vhb|*^) z+Z)YM>PEb^s$sk0DDKs-e$f*J)&jYxu$lL)#FnZHA|$=i2~p?yc>Zl{18;K3pceCb zOJT`^TMGB?$_s2(p*L)JN%mW2AtNWcBcpOJ;f^AQ7+EQbwR>8BS~ zN8FYS+Yjtm8 zt!tK!IMye#PkY5_Z5nD%Jb5ORkQ@@W9lAuBUuT=)F!x@GxS`CCu0!rhJ?ZvT279FpKDW34X0vw%C(#+ zTcUrDPx@tE!hFe0x4R^@wo)_$dQhu)m}+sEYt@PQ=<mr3)B?2$^}>KsgH)IRvj~ z@jiY6_lap8r|s)wS0ON+*vXXbmg221X{`w4L;T`4L?gBgqQ0?#{|E^(@+RWzBAK^T z3|iX0swcd*=C7bb_wpS-{5l{ZimMRGl1?NyHEw#aV6L9ecH+nr4|I00cPq0mi>J4C z)86hylOZhO&RN2yhqL=|hVitL7!R?h9?`lynrH? zx8Y1(6pfV_IfnfTNPAS0l=62da~RTyLGfvq%Ll9fbKl|C?J0vlFF zIoKM=yjeFVIL4dx82j8JV!5E=4ll|sZdQ2h(fQ5x*j$O4)Be>_E|yU8lbm{J%17%eF;y%rTu<@9Q5S($5<*-4I^o#lt7;Wz_hzZ1(| zxZ@JBC#>3xzz#NP2TP%%zqbiL6$5Dxb?CF>=w&EVB_lC05;i~_;b4@FzA9FG@kSr1 zj_4YagS~`&vxcV$1SC!zEPb}#0Jpu985w5`iRs$4?K(pMSEr3-E9p0>pfGFJFBc`z zxb#m=jPlGT&=OLWy(qeP9hlLsqTG%${_w}M|2%Ez2R9~_5RM5u@qBNb>MJym*R$~k zR4AEP)ecb(s#1n2A-1E}qrc(ew`52ZOE8N=_po9}&DVlF2Q68Bd21GiPHMG4Bk>Y4 z%H^2wjG^e#Ey074FG9;`KjR9gBLq6=9rF!r*6)@XV&t$d@=b%tvVAaBt>aGeig zr;4TIgH0e$K!{cmpIu|o)j$wS*`c?IW}1+eq0{G}wP(E;Tr1SQ&p@Koy&7D?)V++L zPO4rVtIHhp@xfCIP_TWtg|}}XX8h77r;y^(W-D3A5rh(fbp%BYSNP#TkAVkcwBPwP94N8bWFo0?*egh1x+{f!_+$ z#%YxdF^~&eM(%)Wugh%w0f@fzY?)p@u1)N=2cG1$2z@yYtJMy{N@$}fsiAm72NXpF z6QBk@4MYtL?S6tQC=Gu?<4P2wg4>#@jU^rnzm2W2Vce=5(hG=t*#!S<@*}{oth+Q% zEJQ^j8~zubFPuDqFSb0sFY*wwFTo`XKLJ1L4+7t`ZaZS_4F%U~c$AZcyroCaMTiQ(#xkIoOM4-)ZN?)eAF_N@ZV@pMd82EyV8fWBR{2atqH5PKthfYSRh?E27pC( zOb-`j$f{C~-D@%7Ns%2bg^&Y3_Q0Qep+)0f!iB`+T=L)7$Z}8UQ7ixvMH1Oi!W1i{ z7qdBKRcwPC>5!(4dBNbRBWVC$2%nH@WhDwaP6SQ_9x#xR_$yg1ro2z! zkfwCEDf`5_e_(HCNG)DhO33RQIFB1%dfolF?W6gDMHh;_*OJY~Z&_bbF^fju@D41Z z-7XC*r1MMyLv2y;Losrl?@+vFF;$$%EUc#76x8%Wo$PIyK{dIY^&(+>Hs=kN^s?Gz$`o`to{$f8*8 z3I6@T2b>pHN-gTv*yddm$cxr9mDst5!oPkrRdZj4t4w*lzif;i2g~2^r0CBroLc0* zl0GEKOflBph?Zp6dLE*uXrE&$hQ6a>Sml`b=Og!E@bkf7Isbsw=3doou{vVL4%tm} zmZUk>_s8Kqxnf5WxGl-?OB$}&+Wy}lZ@fzLYEO!Oy**(^s@9+X>F{zcuQFZQY6b=OpD@F#Mn1{w zW<6KYCr11hPabmx7Fchz5)-ssr~nQuPwXd$yM;-JV~!(8>WmMgbq@}s=DqelZCjXP ze9rx(@+uREMcp+YjD_!?EfwA%?i%7+>zZ70*JzbLtGZJIp#nFRAu?`p<1@aLa*eNG zj8@w3;24bzf3vajOmV^9b?I%xT>qSM9oScXqmN&6UY~yPS9bfS2K*{1;DLp9t@lzr z%Lfc{PxDOcss;E!V@pSi`DVL5r>}-bQ;ZiaT#@#Qcp5r%N(Orb#EHh7G(VB$GSTI@ ziPu zDPi@fa!s)X?H=6pE_U1U5`LJqC7l)S_y5#w%-tW%eklyL>m-WtEGIgspQ%+H##c;p zSCkhq+bfOj6_7X;ef>$TQncuYzesq!zC`RK2}^qaNsYvdD#;f!`Dnv5=VIxeQ->%D z?O0z=%BJmGyNzBcgmah-(fIVpS3bMO{YnGOkj3xQQCm;~>Ta9h@hT^Lpd{0Kn)CW( zK62%KD$<*Ca19P>{=lt9^lwul_nDk3A-gXcH3!u1iv=}H*Wxi~3`hqjAcS%tl$0wv ztd~x!jaCVkWual2s=L%wgU-C-GJ__CT1!1u@*dmIc@N}&nEMRV=)PTK?%*9WN;;C6 zIyuUz=V&RNhE97UI1xagUbuCbX83{I}w@ISaaRw15+xvMkq)EcVObxueD~+uiaEZTTbdKmO=@?o?h&D*tEA7DNV!Qmhd1o`1Fw&f zGZy1_d>#AK5WzrUHvn7Tz#w%kF@*|wPtqmv^$iY~p=UrtraL5_eGO@<_qKF_5dRxY zf44gM5zt3N(bSZ~VUGDpiWfxh-#MIe_8vR*!?hROD7&UWyAJ}y1U}`k)%>!YU&qDQ z%3&ilni+`II9GpG(Y)e^kh8Ztc8Jxho`lS_HU-{;en*jzFMO+RV(gBvf)MvT@zCb` z=PMm>uz(M#`y8@BFi%x&NAhmMS?&$jn$v4~hK~*8M(u9s8K;xa5i8H??9ju0;yZwP zP0c9lS4G*ome1nYX~yxE27@bs*Pl$L@tJ}BprSnLy&tz4wG$4Y0acZHihz?hODJzY zTvm-N)ZzP7Q5RY(_9Ev9uIHMYkcCy;G~UgDXL$6PPPX!l=w5z*mmoalEEsAV<9I&p z7FOTs-dTUCcOK(B7mm@Z5=G~RKDreUyUSTbH;he`lyHt@!7=%dmT$ zn>TT8O6g!Ae1ogY$J>B^f4$zZF3stEZ|$x;P9H|i0AZJ?4B=I|o(d(sN=R4f?jfTq z7;mU$+k;<-!vXbX4ACc~F(6T-<XPLvM;imRp1^e zzZD6UKV65P@+v%jlz7%z}U3m0R}gPVD<)wLtrmyy&^ z$t#Wo&kYj7C07b5o)5=%%)aw74AahY1LJPII}hLP*MPanv6q}J=A!5PdGEk+A_C_< za)UH(ja$8KWAr9fl@0tR!fsPZTQLO+Slg`W#=hIED!3V=YrJ^xMS$Ons?tgsP%&6C z5X8*U49%{iNC$2WnP$Ek!eIgNuA zw_ot#1x8Yuhi&KCuAe2cdbf2q;vH|A1Jk)bbS}%ptdirllMSto>Pd~S^R_QY1rEzy zNT7>4*(bP8L$hX=+DgW~hnPOmf%#jca2Hx)de!^(DsF`HQBQZ@&sSatps!}WKEasD zH+BjR3VnKh*o@sMeuLULvgnxBcT1zFTe=vtJ~)|H75XO#@fxvbP1dr8SBTWBODpr{ zQ}*wQw~Mj^G(%J)eIvu4$GdX0t->WS`9D{gKB3{C*;T$g9vQK>^x zSVP;K`Jg9j@VMu3TBOmnm^;Wu_t5M2KvVO+*WA-06Z5a1dP-=C1GH@Jih4RVk3`oO zEt-WCmfO*PV?_x4Zol-_cUTA03tlVV&?w~wmF#!2+$@UP6v`#qftpvGzcg!E2B6Ky zV&T6pcOF*s`f%O{A%omv^F?c|Yn$A{a<R?Lw)e zziazdaNFh2#a_f+)t;sIr$~7-mmeT!7*KKx6G04LPD4&B#=zG>kzg&us>MrxO$P55 zGbn4ruh&z_EXFs;R{Gh%ne_`k!XI zS%yXHx%c91(N6+=e>fQrRY?c5J^FZJWB@oGSqBu9#f!PK<8s>DIcKx zm|`vhh&-koN&vqePp4az0Vp16;T3@6$COVs;O8UPr3O&(NPAijXnK4B7L0CpY63g) z@f8tz)&pQ`!Ds^bKA!W4Xaz(+q6ympsPHKF9?#GzvI<%&|TW$bRI4rvSK*wB%{P zKj-&vW&op)O);_v*ni|UF9TjbHj2nLfbvnE=^kL{k<0o8K>R3A_A5a1k(+xAAbX^B zeFI!z-|O7e3QTy&09Awsu0B}7f&c_Q%HRP2Zyu}3t;V+w2lf_`5am9a1Tj<<1BlsT zi3H?*R4NM%823-H7{KYrAiykB24G>>F`OSy9zAgb3%R!cuV(D+df9U)NTF%?JzbbeI&7zCVo%#4V>I57o_?h8lp7+@Hd8KdQ#6u5=) zV8J3Iuy-q@|w)nD7{#L5~Z#{8*2Cg20(aUtdf~ z;QV9IX6S(wkPqYjvZV^s_mbhTG7F%r(m-B{dlDIS-`aB+>HhEjsL{WlqL3YEh%`|8 zA;vXySQ;kjZ!o>*|<@pIbKEi#}Y&f|4fYi`GFs@+2KU^CZ7aK>b{{#A0?)^9R zfBwOSI~Hq$s>uK;DDEZwhYm}C{`1d0=*v9_FUb@e1zITsM1QzC3fe9MB!Q=ZPkI7R znxp|w1VxbrqEp@*W!VfLT?HEw@UQ^Oy#J8{2Uis0hme$yObykQ1yVlB2$ThqK0$&J z*`R?+K-@>-$~_SUMtlj4di{^gIf3*RZU!FcGYOi4ei~2aS>xgpLLkk^d)$w_Bi&Gz=XId%?~5SB@}D zSdOPjkr+hKcMAWY-G{vrBY+F@ftRbVn6 zIVSfU0Nw+q6NmUg`Xi_Fo+E+(!1;rVdv7o*)nga^=#+KND>hejrsNVV32S^5h$ zIK=-g682pbGE8!hqK8^30T~~l#MWBSTv)|O!}^5CU(i=%&=6%HeUkK3V(78bzjg3O z38tJs#XXA+Dx(Y}d62RlJDwB+lfny=V)&PoN&0)J{R1kP7_yUq1P6zS3oGw0)I8%o zN*t=B0>piQBHT6M7{M&?fZ3}17j%U4q3om$E?j7r3XtvrF62E}F$^o;7*@XIU+`R> zf8oSXaOpq2fkf&vX#h;}573ik@Yy4oLk&h& z5%~`p3mT~g#CzoB-Sf;P?;9}br8pZDTm9eKqg01+rlcP@wUR8*I2h-_fRfGcULr8% zJ7A{^h5ynhMeZIA-B1V8JRl9fO%g(2rb1z8slSjcN-*RLD6%>b{{fjD8v#;=)gcb7 z7fAetBv64NrT$wVFRS^ANn!O-2Wy zbWiQny{9H&XwyNf4gaz07*|j23G6~V3cUY!l+GCZgGPczYyH#eaAyfKF)(d!V7>cq zjcKfs%xt)!r&>Vr2iu-qltppCWa_~l7JqAu;{ZcS-`DK_-w34oPd(g^z{21pH-9>& zf1lZpL+soGyF>t1ZE&y(`Flic2Pf6}zkssq{M(!_bYN!tgutkLP&XX`x-Zup=nfI^g|2%CT?{$Ex>A~cM6eT??;D;9L{aeb1 zd#XkWj4Jy0oWED~YBp491J?Ckz`CB`e=73+v9ePE#W4W#Kz;Q8vFhT_74QcROd;|5IOF33-HdFsYX?sejj(Ro6XK;n8kW1DHl`Jy0#u*cBLzu+zC|Aj}H0*N2sHR)IyyD<11Y=jH^1t-P+7mfq{ zW%}=r2MjZqiaPiYET}T}|JT-)fY*4eVJA61K_o$Bm6af|m7+96V(C?vs*AQX6%lLN zh?X?AEU2JCqADXnY_SB9h&5tgT1)ILx3MIx+fqAKE!w*8H#6t|&zv~Vb3E~V?|j?L zH{1W88IvPRbuu}yI+HC4^(g_`U?hWO1gOWZg2#0gjDFTJ4Q6%4Z?D@by3&0GeuIa} z#lYvJg*9?TPR&G}f2iQ2g9JV$Mss6xhik1xMeZFA-S>lao)>eA(Sii8YYcd?Ep=W` zZqiM+WOD$r&Kos7P*KtPbsg%?{6=U^IlfEdeLZR;RMov~Tl(Sl0$iQc`=j?(h zYtXjBltLG}tA$+H?U6E5H-@_ny@o2-fmW^N|IdB(LhdYot*o+LKelgn0k$^+UL9t= z1_^xrD2%5(xz3D2gu4wPa^tLlruw3n<*YWN!CDY$VL7s-(OQTowTW0aqZO6|dpTNb z$U~#eNNxTEQ|iU7a4o9h<$@SXncBSP7UEKA=5=u*XR(80G#{~g2R z<9>1533Y}j!EYRAxN+QpZCr-P&@^uK>hKQ+q0WY)cGX#U`zc(FDD>l4Ih1#pI-_JD z@E=yeUz=)$AC~|nePS)y6d4P5zDUvm-eR!cN90qp1o>sG7A(juJ_fmLo=$dPk!i?K z-#BTrapk#fR}`3GRR|ZLyYb)~SD)Lq_V~qCc-({TBMgZC1O}YWwc>rW#N@s7-Oow*tC$yynB9OFxc%_Xb+wN9buHlyusCmY|``=TkZUw`=>f z@Frl7(Z{Ikl;`IO+LsG7u1mJf&#+5EH||b$7H(16blDI&cLEG3Mk`RqfISrg-}5ug zi?5gU`<3CE+E|jwnZC_|uKb4$N_arC~Lkfbf@F z^M*~(0=Zw8$GZ=D4R%JrPIXFi;g&ATopt`nn3WRyi4g3^hQ5_=u1U*yrtVuBfaYb9q+*6Z<+jASuZ|0wspIaUX0vc@<KO2Yf!ElH1g()U`C$S~{^k!|md1P^$#Gf$`hO0< zp?|CZo`0HL*$@3RM_!X>55)ZKiNtvy1HZb^f5Ki%#9FrR3sW6sp-iEw=y&{7Fsk4V zLZZ%yBOO?qM6I__o{|XV6*Y;1aT5PtDQxlla8;$$`dS;zS2ohyTeuI(G}P-t9>SeRqm#+sJ?FhTtVLT0hHiCpX$mjk&r+so-l9R|PJ_zWH%X2^ zE1!nsqSj>Gb7w1IcPK%FS?44;nfVE5}#eXGVA_IrxmZwq{{AmePPyyS38yWZvo9r)#~$5(H(q2@cr*9 z{y*;{p!Y4%xW-Smy&4+#4nf1XK=ZXh*>Mnd6RlJYXk|YkaN~4KKo3oa^-KE;GVb%M z!JKDUJoB3Y-l+kChui#WusEHUM7##?3Et5e7I9aIeD-}o4xA}HygcRZ-R9^5qug*T zgwi`K98AyX3tDCgg|u-Vj+Let7K2n2Cuahbx=L zwn>2PG^?`j*_O?kt!YXw(!-9P%Zj?~N2zUyY_O$xHTSfrn%CsgSA-F7n&#S zy0h#rG;h1~VLB_2Qy&oZ>L@|=pQU+oY8d{o`)916#?R6ka_R!2rpF2Dfmxa_r&bd6 zQM^vom|L3W!C7t6z>1h}#TuQ4d?f4pF7_A@k0tG|Fx?on$ojkmR`?Q#;`p-g$CY{D z-om#bzT!jpfh!3g!WH_?hQhY1gn)jtrG?{(>bqJ{qjKyt_U=+UNA_lvCN4`nMEn!$ z1po1D&7I3~oCB)cHbLz&M@DflQR{CP)bIkkT5Qfjbgbmk%TrnRnxIu(!;HztVj-^( z&s_rl-5e?VK1rFfmu;P^d9i{Wc6InZB3BzR7p##71#8k=8Lqg}sLRg&_)!eD02(W@ ziw_gDEw^AwlU^(KKznoSkoYMm`y0Z7*RI;BBP|(jTYBVni>aSWWiwOKb#^cI7Ha1z+F9w$9TUGTaOqz+;n?j@T4C2`a88 zac8H~WgT86*5Z1C)nJ~K-KF<$&+fTmy9jelqvxSkdlIpM>(#q|!D{6jIZ$VI4)5tmTeomt)aT3>E#^n6&6uy}^UJ_cKzfX$=dA~EGXH24L4(+&46PCOAvXg8yoU>FWroZrQzod6qXcza zlzlxTfW{g1*7GQm?KXz+J-7v9J0{HTj-h;zgZ1KuP+L9`Qe8%&TKth&+>F)>p|IC@ z;pw@BS_77}P;=*^cBTDqcMN7D=g|Eciysno>r)-yluLO|QgSB?>K~ukiy11H(yh72 zep-_Jg7j6}N!e6^&-e-}1BzbQ-4R2wAif0OcKC`9DJ)B7=fBdrag=eQzO7l?QwZh5 z-d&_M;bu+7Vf%Jv({*Y+mbgH3!CaINKUWqZ-oxKw` zmAA6Dv!U8tC|J1HfL}1oRuHwINKn7emc3?3QPVDTQS%3=mnMoWImH6{a1$a#D+(^V zcMhb?+b&qCIa2mYP_aNiw-iHMd?CW11=x!bzp&OQyB7ytqE&;b(= z`N4mxc^}g8W|q55PGm|*)Xc+Htcp_mTI?lu1Gy@v<>22hBfb~wxm*(+PCQWy&k)s< ztzE7KaqMZL=A9SR=O;0U>46E(1uOodU`6H1?lzUE&Au1ZEUm21TP`n zi#T(MMXb;Qt8^O5br5<9^4;+=mFUc^OkDw`**Ay~!o07cy7}6bqJQ1%dFuhFq$|qC z8VOkaGgrwPR-pIRr&n*Ugs8Xg3f7#J*x4m1PpjutUO*4?JLcfV&MryGeITG7zuMPl z-tjWmx=nIg=Z+@<(O595xpwFefe(FTUz`0sTUJT*DrgO`W6_&26XplS@}uikuEJIr z=~qsIRl-nYZ(9fl!}{|=%8`0Xp{=!S3q`Niym%;+SHqHib!_>mjyU>mb`5h~9w{#V z*sQSIrx5~Av}-j5-l_MB;&s{WHE0M9)yN;M!&H}P=!dc}F;R!GSXbM!%balDu09)C zxd!&NbGHTIkUw{;PX3kz&>Lc`b)JlJSRTsyc!;u=!~8VXr=GbM^DL58mz0^eH5Cme z0b{f}WLDUetifFBn;WqBL|M`}F3@&5vDHu2$qx_|la|42VbhI3l^?_dY@8)%8{p!W zD!vAtGbkpg<1byRHULw-&oQf3$Ip^hwuNptGuLPFmXUPt#2S^qp;KqIt5s{EG?&PZ zdP($-aF@4~^Xp)Fc9_o6#A$_&>tz?I9A~J=`=*<1Aslfi)@KXWL+F_PwtO5iVKr|v z4w|qH%*z34Nznr*3 z8bY;Wkt@&-6c$SN0t#UM_;rF6wFM7r5)0(qE{9l$HdwJ9g2J74+5#5#y0}1;^5fL$ zM18SYr)sQV19bJ-Ds??1zE2_XL)gKMX6LM3vQO!d`_Pl#m!er`sN!3lVgWBIKoaX# z?=OTTZM)8@$()Mh;^Xfg`#y_70BRz~jYkv|fa0A3A73QBn2$f~VoL-SZ?QUaQ~jvv zRcsl4yFH{dLl{+qeA$m%v3J;Mt91Iqtq`*LfDqEJ)ag`2S5(1|ApGKaQAPS$csC@cDX~ zVKpGZ8U7fX#1xf;lLEhGo9vs;ZiAHX$|Uuq*^vcqmkPr1hh6QnI?IhM-!AKRGgzh> zY~c>X=C|!~W5#(0@Cn}uyvI2VaJoSae*b0r5vad|uR8uTxGeC#JGJU;{{;-lZ)Qs` z?tfi)q%Mp;iEk$q6d&>;s_c>*7tp+PO<^2`%gMZAc z)7t{-xm(V)dQ7=ivlN_RFr%CFM6Ab-+!fH2-Ll`zEr0Al7q%s%?=&8Kk<{z=rER;l z0G|Hq14o*UgR=-y)p~CKSU`uA$aZIidwN3O(>f^+^02S*Z0b`vGESJ?d&JmQwTZg ziWgwWD{lX&p65P)1{DR+qwY)X^%D5^-84TVpd-KAVDE~$K8i$D@%0)KUhHEl5l3mJq(!v6N<{%u%)o062K z7_*$RG>ej$vYfP(hURA_X{FdE)KBE-F#dp#V09$!NE+iF)P`oz@Z{GK>`qrCNb|!z z1-ln^7i%$8mTINFs^uhKby!$>rau_D3%zn)t8*A}SRUk#kIR`ZFDkCfiG$~0Uu}V0 z0S_uKD>#38=WuT1+Ow!O<;gOX0C{0R?W>q;w1L>#FFNS+Xu?O!i)bwMcZmOg`S1|` zEIau}B=kSqNDYJ}`B(S`N&T-d4$1JZ@C?ZT{m5I+Vb=HyU+UE*t|WBRY*YC^{BGOO(90ON+s!u)lbf24JV^h(NlHp ztA|V@xGMz*Uy8d)Nb8G{Ix~R6w?eupLBMYIn+cH0e5Wm+cB3R~ad!73GneD{w0QA- zJ!L=g`xMbh5VdsUs5K*`9!IA@mCo{h0-=WO*^r}241zb(gi ztWWv@EraCSUW%AM3fPg{r(f){e?o`qQ^e??Fm16ii|CS_qtv;3x`aA!#o^Le6aWjm zP1A`9G=*;LpTjl|Ct5SZYFPCfk1~F=0tnixK1yXr;CcCC5wK%@3(XPG5NHhCWW4G& zYlregM<|(UKzPXt4BF6{gBAz&TBA8Jw7%68u{O{WGuxR$i8ryIpq)c-s}DOHIg>S# z$M}QqzwOZCxt#hLaxH5*UHRS!ECBZ{1e*GKr((o-!Ol$_`1((LG%GPbo!(>~auzG^ z>%iYu0Sz-6zs8g_ol}~sZe>acRyUDpHotLAS!2kCu`x%U)TBF|h6`j+XSs9?{~?=* zDTdP@e)~GW(1WI_Sap7LjY+lj$gm}i_3Qi7pR1ub?=P5P@rylVJtX=Jzpa464e(u( zW5xGzcGtAzGj}49W>K`LHwYzp3HJ?Zo_C~$R8Rb}%?Sx;A$#@sCmeH z4ho2*u#g{xQH+e9l}!Pq4?VE_7X~d6a0s6;{xR*rwgRi+zxbttgn$svY&wP{N^OLK z&+JozAx=F&nEjvmuk!76qyPQh51Cni3`yFujYtdmFGApuMgK)e@RW*q96giRJsvAv7zVXo$dUD9Ykxq*MoM%78@2sJ|E-CRwxULE;$L*W(bhb5YasYkg-A&Rl4$ZxWVm9(rkVV3xpLmL@TrsH|@ zK!0e`<`q(ajNH3dCSr(*oPVbPzfF!)lF@ahVREEYaaNlXUqWH)#|l+sQ?5=D;27}# zsk9!ol@R^IP;qPJF6eGirkxF?bi5sE=SODOepw{p6)D6MYWxS$eF96E0&6#5W+rWJ zrPo9H5az!MyzIS*#{a95eaOsyxxf4ELd65dt12mgMX~ZSN2x}a6SMS1K7UDGE`)4g zhGE{-gciHQOQDwDbX>3f_}trCxQ})x6P=qCY76tidOMTr*Z8n2V!R7MWHA;IZiUIn z%~`A6Xgyf>y$fMZGSU>eSuj;Xd`zT2k)*_x=)}&KSt_IA_b!SU>3tZ+CVeWd;_f~0 zm_3KCPh{@H|JW3RyXZ}WH*kCu0W-blpfFY`#E+BCmp&4;y-^9?To^%Pc8Lw?@;d8G zU9t(eAx$BfDiwcjE^I#p{o{g!Yer6w{obkMMCT%|l1{e(9raIo#ZT{Vrg9%#6uuvM z`|F`q2_`})nnq#=DdzQuIn|?LtWZMiYpBGioAG^H=evxpTlo5eSY6$Prie#X-ba}K zRo-!M9FXy^@-$Of(0PFAYE$xm)h3iWOf$MHLs-`k?o8#h610^j&fHZ^0Ut7zgv^#= zJ>>b=?YR^ZLb@*#othPH8^#ZH);HU`&G+ze{{wa(x|;@>=~9=i$doIR3o3q_+iRVF z6~VrjH(u(S>3-bJZ<#%%cTV>Pn8c-VLKw#@={P9_uUPW|^+DioQIJlayhxnX^diSB z{>vs0^k!0cS|N#=o7f0363g$VI5Y<74qycnlX+<2Q;X{_bXxKgzfLPNOau4JBjZuK zy&-OIc>ae5x&040l8jkX_M&%#v7h~ERR!wfCH#D=?@T3jvaXz&OedE@pClR^u!65O z&m#lRfAl|;1j!1tDMl->+l!~;&I{~-@y9*H7EFtb0ne796m`E5PrPB<1j1K!lUSv$ zt4#l2MYi-}sQrsSisk*zyTAD3`PUv>1TX>r2T?UFJ;;CKT0J)J|Ab5fU&zm?=!S5q zq&Nuw;+P4i3i@AZgP`ftzfuD6^Z!i|MXCY+Z{Hs6r%(TL37g}7WkPTe5P2A>xAZzd zbHUxa2t~O@W(#sEl}(TwV!OqFcN|o+1w8aoEB>%9i4AATbeyB>61CGRw?Ss9S8iE+ zO9Pc#Pz23SwzJ2*iw0r2Tf%reXmhp`qQ~~`fLrI=SzzDW4+jX5p*6tsQC=9_{Stra zJqMdV9b&qLra1Ij@pz1z;=3(imXa2Tu@O$^iGX|WiWQ-IR>C@B>xNbAD>=VGln{39 z$`=D5aFxq5ImOGIr^DhlFGkh5^SFPg3^Uo!JUr70d0Zn(lv}5)B)B6?*ps&%JR(w& zmED&^P&u$mu%T)_@FD6y^UXNoi*Ul9x@ucy+nM$vtUf~C=&IR3c2BgkZ|?w>@ge6* zqj0@776~|iv#;h8bJKBIon+*jn#9j>SL-ch(3|pf;<{WMUd8IR?fPP@(S_sc%_@od zz5Is6x>BhXl|8JhjxD|@Axr73$Lb89ME>y$1WKTmC+7+qQ0LNkAO6=OJAl7Z}p`cx>ki5xw0fedi0_8hu|0z5sUkK@KNG1 zZpcc*nc@obCNx{Ow=P_2q)x2kTON6Y=ZkoUi`EZuPPWC9CE2mj7Jae1#jjV26RV+J zB}+#9N=xoqe;nr)o%u_I)R&!R$P>d@s-)F0zZQ_m9w(d{Mdr*Do3U~N(=NJkh_8+H zpV^k^GbCLKi)=xVI)Nik3HBu;MB$P{{dg60Y&7-gI9^Yq&PL=o^f!t+^ajl>rGq#7 zub5-s@V)&OIqZ$M8U&2Gsx~prlgBeU3?|q&L1Th*#1hfQQ?vA|=Tbiot(-K~@&gdg z@n@jAq;(5s!h!~@1w5C4ZrA1o=eWbeV(i0Iyzzk}S}{Z_{N#q`Y4ydB=>e4tMqSn?5n_5Kder^Z3PD@BXKhR)>LYurjjTM;?H;H|_`8{?}l`;i{ zwcAzpm=#{Eu*k_J0(@CEw5+q8Ei-nC@Zy`fE}E*g&Tt1-2sNp@8JzxZSjs0+!)#?^ zS1Fd~6H!15(WkSe!6>c4VA}jqgMyIQrM*#LfEkMrP0=fOS6o9Fqw{!EfzL-y#ef3t zV-T$tEeT5mxXPp{e|P2F!jbyIRokmookrV0-gK;{+mjnQRJyF@RU;J{kg6Zj^ZoOf z4su6Ks@n*#jR~x#yl5!tM!$aa@_h)YXc7J9=KAWk0=LBn2KB(|3mezceQ|wX4yk(0 zT`2l>w-_hD_(X(TdEuA|bcTrXNlKFv##O3Z$8st;_kz3n*g$5#90qPk2!o4}J&C-} za*iyAXRTS_3v_P=sEVgo|Kviqi0}RpxdGlLzIOz;zldh}6VoE!Av&XZseJ_ju`z5d z@k$RP3BA|j;vD?>m-!|dZ1{&nv04#JJKanbuxL-fb|r`)2h!c0fQc_Oheh>`ewY-T z4~kWWSpljcDIc{Rdo7ojqj$xW|GHzM059!7EzmD7Snpt!;@4WFPH^i_Tqo-u8pHi=rgDf3H0kPAjTTJa}V-!p4_b6gJ zv1Y#j+aWE>DAD&sBu~;2-{z_H(Kr2};CMGTUTmeqLIHos#>f-EL_QuwkVof|xC^U= zxuAqYJUQ$LUNLLrr7YRmVl!T@_;oCA>lC1qT`&trtn{as&}UU9NkS!wrg@$Mw+_?G zyl43Jpro&Pr{tRs!$O=66iT$VD;yyNaJ0nRvAc^YeCH*4uoVIDJ^;I@QQ`@1vkqYs zJDPj|!xDA8*&>I^(ftU44=jP)$ONJn`9k!s?@WJ2vzsZwj*C7ajs5GjZ^#{nV|74k z7t75>4%_GPO?8fS>bUzlT|b=I@bacyMeYljEYS{(woB*2@M$2_CEx7y;MI$%bDk<&BcBZX@BHOVpv(Iac=iO4?z{0wb?4}eT_X& zQ3LDR0d0mt%yt4?cYWZ=bcK)*UT%eYm$fdtFAvZ= z#z`KsM$~be8F->P9H&)<_hS=u#%o00wvG~UFROXMt994-2^2E$N(e96i%ysc?F%G( zO#V*N6&IzS$WAXZ^n;@hpC`z%?dh-A}jncMboaz))Y?+rG<%K%i);!^k{kh#pob% z)d$aAGncDw1~GZn&PjVKEWVrO#%ygMFk&^_$Tf|{aTim4>y2Ya<3%WZ^Tii^xjUdR z++~H*j$%o&hDHsLKZlz z`hHpbP9BaQW>!R`Og9S$Mc$3P;)gL;W!+K;fl#N+F_$(6h0yc@{4JCGIVt)sIw-tG zA|SkPahP)WDE%6biQ2gGV!D+q z*oe(4b_AnLOOmt~=OrR?V!E1laIYo9(~j7~I51EXO3t%FJTKb;Zy!GygCaqMltZfo z;)d7z1obi?49bBSc)Kuzu)03<#Z8;VUe2YDw0`$yDylL4DR)&@hcX~_SYo>NO;lsI z2&dKT^sy%5i?lLc2gmBb=p=*9q_K}8JghYX!i8;pe$K=%d-d_Y)ls*KxaYWT2{UUZ z9vT)ZffH<-AzK~ygLT=yf*SQrk|Lvf^P6mQHLI5b%P+=QVai$+{6JQ-cA%i+AyU1^ z^LD!YSugND@BQNYXL{g`;N0AI!0j~?+k1zN{SOM|atTaz*(3LT zg_4WGZUo%RoG2;5FtrGazot+_UJ%pj?^(v1HU3lA!F1vPNe4lH3*uW;(o(U_k1!;1 zl5i9B9$7F)>mH9|7+3%3F0AkkMGEPEf2o7PDy!UI7m<&c8gxdTiLMQUk}0bNgA5$e zzVyb_!un8lUMgQ&)I5igREPHh$iksF^wQPQrI*^q)?v9+T(ISPvum6wP7aMwzCsI! zidZtM8#d#h?Mqa>O&>2BRx<bzSYLUMk@+ zT>FmVjFqq3W0`@v3g*NncD_lH0N{wQBi&C`W|JC+y~+~L`>bGQHe{}C?C}0;^#KK#HOAbV(*(MoiNoa+RwIu>?4$LP{8(+s1tiD z4c`D3_{+_Mk^waFAptj68mDTX$R8E@=gJcKFXm<6 zU$v!b^%jdgUHB3%J@(v*tAJhpmgS6xW7Q`Ja=lTv9^LXFuaUvWW5dm=O>Xe9d8qbM zhG8SojgU5}bgGkH#DceQTU=};UTOi7%92V!Del${j~*qxWRYFbJV7LpC3aPUr2AI# z3c4%L!O){bx_pVxU4@URf6#uiZo$ zf742bwo2GRd2(R-!YUf=lxt4UW7cgLVu`e%y`|l_hS3J5bhF@WDOy`-FnrU-t@XE7Kw`>Y<2ZD_B4@E8@0_he;gl8@ha;v1FSs$kba}z zq=-{8?GGLo?_Lp4v%uLtf>428OD{$7b6=DSue`y^2vX0qraBAcBYqm!i~>Aj9BI31 z1aF$Ke6>anly&?s(VkbPnP#=Ah)7yfy8sgK24$U&*Lvl-D{ILSk@1=vq}{KbpFYk% znqA*%cy3*7rf9fPI}Z1dY)4$LONm?_0L6uwXCm41N)Bfx13;GI#VffW&-SDO@$VY| zYA!1x3;pHVkvvn@)Wx#`mNjNz?m^swP-)B?LSF|h)^R`sl99}lb2X{L9!g@A2>q{1 zNGfy<&SE1)Hu@yVVo^ZiC%GLMu-~T49c-XRiQS$|x16CQR_6I7#su$#`3;}$jXX;7 z_6k99=lgBc3J`tn#6kA>n;7ARfao6<-fzI*Qkn*3_5HM)i z0Kh}No8c>SXv80gcDq!KUMkC;a=*u?6?@st_xi>B;uCm~Cb|p5lkV(R`#u$?3p8k6I0&hZK z#++F$fW}Ry_zvyAey62Ip#J}u0V5$axPO@eS4qeJ;|=zu`~RgDNwG2sG5&@ddsqkv z!T*Jv6m{6tH8}#{mO7>wx{!o8Hv%ICJf13Yvps0|DFzq4GYWuQIIK{mGG&d7H08LR zFKPT6`Hwg=Lz?;t+^bTCl?osrNRlpf^JXLaW=UwL;^pl2Rs_OO=5xYlTVnF60 zC@u{ER*Rz(svgdY@FZe0R2$Mfv`5YdtxCi5q=Bjqw26Skcx$c(VB&~+m>ZnV{^66Z z%x@D0QWJr&LX^-iXNHXMz60plo?(;~coFJ)b1YfAo3I@xaDCSLgBf~U{uj^*kMGe3 znaBrxEOYpP8fx-<=K6$HLt)A=TILJi5cJ6A_6{o?!G?Wz4={s4`8C3YK|80;&$pDL z2h)(EHb-D5wRp_d$?^X0Qo7F15(awVlYZ2DPN@xBP&b-Zw*Xk1!Gow4EZ{;2?WkIzpZ8n*;#{l&cJYO+>2=YlST` z68Sf5`gmpfxGH4aZvZ|L|LMK#m<1E-BMGvwfje+p@I@;qEg6WDx-X1!oesb(OY(*e4QyK@$;1ky&c)duopXrTB?emZ(FyDI95yD?XmGfib$+FwfImGRwUvR9wX4ws^~kt zmQLx5H&X3K&&rW$-AC^AN^vG9#>d}*c>}7q$<8k+YX?3)_QN2#W@vLNP^Hh&4B1lu zcI4#zh0QDguZ3P;=*72#UnTk$w(AL_)|PNO2EI`Bd2&T-TI-HlQrqg&Fo5~>V;ohv zbgZvP2pYQ}N|_z;8Obhw(c1p&W$;+x0;w#+w~r=Iiaguj5vds4u~$V$PqGx^x!sh! zPn};-%=LbU5L2mWEW+NHx%1z{{XZb%S2}@2`G=ur(+(Z^ry5w!6cK@>`6tZ7WSZ*2 zkp8U%3|#(K2{;6|g>=NMMBr!+d>>T^NNO)97v>Zr-3%Lkw9}=zfWBu`*_{T`kvOEcM1}Ku7hOMbfg#08@;;pOQw?SFNM>;JU0y5m^NKZ zzBRGClR0X#pm)pLf#a8By6U_IgV+Gdu{|Waj#^ z_B70$`Xwjmd&qRoZ*q23&L{RmPx()xrdbBJLKE@FjhjdV&L*10zob|e`JLg_cK*bV zPHfy;?~MXX0KyFmJ?_z?;WH@=X~MZfopX;cRwh$wc|^!N^%)L#PIufTtMfiWG+|ZP z_maMDFxk+cjg1H3Vavv;vuGsL94$|u)+NmpDA&>tzm-6W zIxSGsz=Mzr>%&q&Z)?LaXB2Cx)k|N3NZ>ldAb8uBK%JGB`S@@b6xXxByLCfdYpId- zfk1MQj9b8=0r6vI3A{_Cik*SPw@yg<8krjFQp&H zgVN)&4;2Bqc0W=Vp@UguiB~0ftmMp_=VoTKs5X&)AHaLD#zXxY8pV30o0 zF!y;r7YO zGD#Pa8$r~&?SOF%^Go0ic5ELhU47mF?Nug&_rBf}S!MT-J#`b5N%*$*YV~XLL6|t8 ztB=NqFpd72%LDU)um7M6xqMD&!O9uY&S!dXAW@a&O+F0yK17-mR1W%W8}8EJAub-9 zP)s5a1A#lqZx5rGT)QXzXosyk8B=c(admVm9Nsp!h_kfDkQA2VI^mqj#z(q?T}{AxytFs20MM3OiBk1Pi_Kp0)1nx<2QsSK&44pM9Eh$V3_smqSOr?_bC@XO@aR!eo8_^^}vYod+htDYF*jNn zS<4s+G#7ckizI~%np4ECRdbtU6)?FLulezrz0QyzacNqw`K^-la|Upf#Rk*gRO%Vi zTwuQ6<1t?C)$iA5K~X~rS~haG

A^0&B0BzmA_&mq}Ra3c{mnOD6BY-*DyVRT#kb zp3o(QiK8E@T3Y~rTR97%>Y}{!YGrq_s7CQ zEV5E~Z@>to#>M+(zS+Z5fxh9x#DE6P`El2V!;cljj3Z@|9eTISkd>YXg6K5{-N?0{ z_g{L%1|fFO!MPxy2iMK00m!c=q~qXvyC*c)=hxlmXdlM&0sjDBmZiSA$kgrry-2jX zUvT)pCAI^yDqdV4Fo4mC4C^5p+W~R8O8`>G1PyWX@|;efSRXnqFFeZ?vLCF%TtYRRVjz#HER@ zJcICkoB@@N4kC2>7cIpRqF%%7Xk!+3xuaZ^2_)Hgi?nFxM?eILL#FJEE4fjuq}2B0 z<)d;@m92fX-&rK`=hjS|hwE+}s~W8U9*u;2WrA`{0y<?Q`wPnbSbXJMgvdIK@q7yB@O4+A zYm44v^dB%A@xV&M8k9>p@JdTZU=#F@h!`n?;6 zX;gIz)Ic7YT^=eDjeDg4g%M#?@M;-238S`^BfC7wkEDR*o)~)%{G#>uZbK=j&OPP3 z?ug2r2DS)z4n~_p+X1I0y-EK;w~j~SD;6!P!w;m%g^<>?7&jjJ8pGGnz3NxuuMLN2zIl^+Y|p|vRzUZPB3iv!QmuCcT&?%)#O~%S_W>sK z6u2J(dEW(`3`EorA93H)gFng-Z+%0?nFVl~w}-RYM6WTQMApa|u(JxZ;PiXg2M(*1iM}pn z9d=9bP!6~C)Me2a97I*-wBk>uhY?!^jp0Dzg4ah6pbx^D;e~z6~O4 z`ccgUB%~)+s%zj1>YDE|hHU~i+l*(R#0s?&r4uOVAZ%SBn zWPlQEcL;4T{hISTTwKq_qXvErGP=5g@W+V3ikb;*H-5ezXqm&04_KvM{ zHY#&fMF}8yuL%(*XMEF;x%y}PCa|I+n?pC?QGAd&`XpQ{SW7_wbpK{0 zak(DUQ^*&WG2~&rOwofa&W=EqWF7Ty$ePo|?k;0~c*HGt$llib#NUxM{YWB+>I`A` zWMNmta_CFaJ?;``Ma$B2B$n0%@hXv)=Zeh;hy@K#QFHBhBc+QCFm$uUFo6el%T|ps z?Q5rXLHOa z?B$x^3XDZlnKP#PDS~V7UqI~zW0BFQ&;mTYgqS}`u#iLfbC;qa{1M%=PWPJ5_uO9y zg(`+e>ZNbXUDLI(GxS#iZF@-x*+Cyv+IG$AsrZi54Nh^^^U{*+^EUuZrPw8pbG#Y+ z6uaCXt&WrUo)LkWn9w0exht{UTT`NRIW9OxX8rIr6oyNFklnaVUO*PrwTuy_^2*%N zu+wi+bcWwbE-hmSD;K`ESRO=}r$w;KuNW(zM7g#98zTF0u-9wn&T z+|jGoI`e+-d5LSRUa`KLM&>5X0cWVydBYljpY(gnje}GkRKPV=_=2%^9d=x2Zu#0e zipFEdDEgt=V9C<8c3^jp`LC3AbOYRjZxy`tDZY?WnZrnSWOPv9hOK?R#_}g3+beXm z2mn|nfsa>_8|E8ab#^2(wjln#ch9^QT6UFcBMu7~w#6aAkl48&CcX*)@dG%w&*7tV z}+o7ALIzX!u+E5#nIT11P=1>Zs zv+NLZs7g>+Iv|odSM(35HoQ({t6wHc|~Mm zdjWK>Q3kNTRjalwVKl{%<%yVso|k-Fv{_fx_;w`3Z$>-rcZfItK>jyzVcQrc(S!NR zeODo8ii$w~kNk$oe9?zNN%gS80y=46>R^5SOw~zVOE+lVQ7Ji+18rWb-zHg$*Q74d zTCCaXqTr5FldF`ov+vN>=JNczm*eA8zqw3NMz*pD-_qABmGimtpXhyutVM@Mo^{7)Tg+#{AqZm}Y5dr(M+`t36gr9ea z2|N~fv^j17N_XG%qck*Tyr%=sS=qX^9^d>U6w=MRa$J`Qy$AE!=w}UgS%v7k119!G z*F1?AGvZmdyq6-($kO?wdP_dV1R@EI$$2KmDl3>!5jW-B`Qiv`>te9C7!LYteMfMbMkn%vZ8(|_k^}B?!b(sQxp-jZ z4Hq0X@LQ!)t^~Kf{1!yqq@ud)*|Gevl7Kxge3@{N$?|)wr)#}meKs_(l z1|6R>al*5;&Lzf@aRbxH@HVTFXXLQMR5cr6#9)M?=+@(Guod?|u{D@779+oXH?!hp zQU9eUbP{GH?+?_U+A1bf0gb!K4O$r;@aTikIQHu_+&~W`pSG zq_1umlP|AgMeMyL%0L_{dtm_h7qxMAwfGsk3oph+oY#NvxbQ4q$Vl zZZb)HrVGu1GGJa6Nt@jU1Jk9EgnbBkV*ncnzxMLPB&Bz~d8MIVp3!Lc*;l4nGEuD; zL&j5&Pw4mk>%bi=_}Qvh*HN+QHXx;Wtg%_$PyUu{r7LtQS@-DGow>mn|HoL3j*Gm% z>&Mp<*kn}Fwc1jUt$@)H=abTspYA$g0{;cs^{O9r6L9g3d+iD%aa&^JxlL347-7r9 zm(0y^ACz|dAihX~uxcNwkd~9>xoBrv-`c2-Ri4jCk3N57=Mf@8HyV?j+DcJu)a94a z=ik1}+y9(zPyXqY0fYQClq+hl={<6Wl6#gmocc;kBM1I)7+a8-KrE;at^53;3G(xX zn(ax75HP|TRXUtS(Sk>)jyzsECl|c4WO`8GZwrF@Y0*rWp3XlJwX}_Mqf4k}RBiC2 zSi(ls38y2KO`xUcS6I7E+=t{NlUM>IB|-ZlL|o@=*bS5TJv$GQ6s3k_^IL?^V(_|j z3GNkX?U8RnK~-r6nI10RZP|_pLh}LS*UTjqTVPHI%R(UO46=mKOA+g~I2^M!>w83o zr6nR`8Ip#sm;ZA^iX3CMtP#7M^dyZA1$L@&=68w`Sqi_#mF zo;okjQL@3D|4s6u12>SXeJA}o(6?#S8*Ir3ezP-F@(}rrmM2yc4hj#&f&GB`uQzu8 z-Q8agfPnbQ^p{ckdkHbK^%#;Q)zk_z)zkqCh>zf_dE$0z#pBZA7iK9JB>G8Al_wWT zO%+pALf4E7j)u)pD_|p#3XQwlZO<8^!M<(9xZS=6W!J`W1d2*wK>NC^&Z$km=DGfF zo7eaECG$K5x-?iq1-I{IPK(@%oPNs&*8kb7K)doshQN`2_~YmoQWddG^%rI?jvr+%jU;%^bIWc8m*@G02a<=J6ICAc#9>(I__hknhS}jokGx zp9o3YT1aywO|u9l?NwfO!)(}^4itk8L>5OL5}cp&HyOy4%HjxxD@Hbd=K~I`;}kTf zS2!NcA?5pGcyoZ(gEM*W)ZxCSbo5V0XC76&jyLQNGFrX2Vj{oC#p&>oN9YLH&D!`K zdno>V`1uO+PGPae`U(hT;rQwBAe*~(fA)mjHxy`Mbp;E{!++;cdm{$t?6a?g+OrYd z!F#XWGZWk)aEA)Fdoj>WYu2C42FEgc)JTsSzion~;hwRyMmaCTPcHd532H+63}YbtuH8bNUl@ z_gK6kX*d9kV)(f3v-fDZucNAy>8_K*yRF*{kN_Hfr;09gq975X7->!5vZ(H|;``cD z7+6TG&N-F8aFrSsA==iRewb|p2``AG1R`YqMnjq zK|+Xg(C)@|woy340T@9#h;hAwB1|1-NLelYGy@MpHA``xmzL%PJjj4C0NOnKzU#M0 z7gPm9d3FXRGiLn+rP#t@=D>#2y(=V?N(>_uDHMlFG9qNps=+LpKDkNp)InWMv{kN4 z>9n7>^Zw%YoH*p46NtHbc$G*E4eH zUjs{SXnzhBm2-Y-Raw9h+U#a1Ug!#%J!*|scnpuX{bB>@Q$9=^_<2=MM**YW`%k8! zWfS$%P`9T<#gW}zywGElP`VL(d%0^+nqc(oVt=R9^{r-iXwKa!IsR?Qxil}ni$4?A z${Qz*%29lgDv-BY-+Y$XB*XhUB#UGLc`+w22rb-kra^mJPaPu$Ce+avstQeKl4{Je z*{+Pbu5q@V2^Cw=W4A%V7~7t+6~T+4CMUSrcCDMRGZ#%%*2% zjQFnUhBjP+IpYXvYuZi6TkR`tt`e%F)n0RFpx~X?4aC(OarV`X~Rk%6t1a_60gPoqyB}lbe~8quqdd#&HN%ICI{12KN07o?aIiBkcWU^ zL%+$tmJDl(QN|~6EFcwUl-h1D;+ z#wnIq{4M?{Xkx-4MIY`_OlonpFJIeQwR4yU^)151l!)iqxp_5{ELk|YFMrdc(!zl4 z3sdbOlYJ{u0YtQ%MKFLaIt;@2Yaga(xH?ZBmv;?Cb`=U`xtdJ%04!>UlGZ-U__Xi2 zOKCtlzj4Zp3v#aJc#-W1`Kb5eT^S13N*;T9+b#~I&p@2OQw_!UNUE#Ks5p)NQUZXzw3>hKbjGDpRMEGNY zG6&IBwF|iUF_>d71|e(!j@qmV{_u9HuF$|pdOtc4?hCyuX@FF41DFRZ0IR=h;CEag z5~+qMoY@_OVC8^>Xp^V>F$8+ z@8W}`fC!b;_#v4_li)QyH5XRjaOndwl>J0m{&zOvs(lqaL5Y%ArqFK`!Dk0BFu>8r z-LMX<*82gZR}Yvz`CkLL_tY>Z(wpa*ald1H-nbfGQEu-kdZ>!4U8$08l^-NpzEZkHs_5@I)R+sBEhLNj7J)kiNEyaYDWS5d7#SpT^1KpzIpOj} z6xeL8avE4wx(r3~I?3hoOIV=fog_-#eeoX_bDi^wBb)03VcHsoV*Tck{p=nP~=riC!%)()%tN4jt8WaH>Ogtp1=Oj8#A|h|#Lb zFxgtJ1tSCFUO&~hF6$ea@zk&b^I=;_6p}j4B=uO_jcXRWSS1C)#;MXAmY}X6`YIGQ zesoB8MWXSMGp1Qn%GASgjOJOGvhdNOSMj%)Ib8$ zF!A!CS1y#AVU{@hrM*vj_RQ*P_66gLA>Z@BtJLrhiLF#pynf<;&#%YhOyOHb*{M#c zpZ2G;(}_;3w3{3kzX5_uK2ZO)aIbW-x#Pfy^H65iRRFY3V^#cfS~p@pWG8kE4?D(-TW%{E&m0J}=U zTqEUlL%^pcmQrB@veo_|@A8NQic+Vhf_`i379Y3L>B=Rt(6!pCb}c93d@OXjfmi>f zX4$FN@b_fotXywB{QSfwrz7&8B~_ijM!l2*snj~Re8#sV3qZM?RYU{sO4#bOd-Q<0 zpoLvoUxge^T4G>)jXI1t0S`DF-+i3?v82z7lz;e@VXRMcvg_dbGSL|`Lz={jWB5Yg z(NebHyRu0J{7O$+ZHHE=oq;T8w%X_qOLbB?-;DN|o`%G}7?RwlRo-8pCr{~&eXQ=0 z$Ib*-u4|@qbR2)PS@a|=jHHAY8=V>$A5;o_5^<}J$ZiEDcs0xc(nr6u=%7Fo{QS_) z`52nVDi;U=wdW)=x#70WkIJGI@d;h+Q)jzVwPl<+T-!UoaJ4tXQ5XABFg_87hbyvX z#wmrMy!(FM#prsJ`cz3}IfcFk8xf=>UTI6f>IjyySQNQ4y^g^%y^T%&? z!8kdSj^ zx%BYLc~)&fw(&b>Rz`N$-5m?U4U@9FdAr4IA?Ncwj@gZwPIOfLFFt4uVIJOZv|Ojg zVvteD+=H=Enrtz9e*Wh6&D zIUaoIIh1l!XE=@Ac=y=RSc6$|Oog~7Ma)nQk=7tZ@u&BjeT-kbLf`%I4$XH-9DYZw++aJg&FsJ>E^fPg+IMUWRqSQ?Qaz>M zRQ4No5HuhkHT`J#wO8CrKv0 zCQ0;f(m23{tRZ+3o}d6H){YV!Ux~eBuOmkUEJp=%N>_{(dE#07mYQWv6vokIV2je zBWP74T$V^x-||B_&90&(DdG3Zn(QQx{U87Qe$wP0NQD)&8bCvSC69t*(@0fSz@)MV z()t~$c7B0(&-L(L_g1A%iXq-_!b=v@rw1oei-E|b@K9b6%;pAKptEcfSQ`b#zp-L?ib>yQu z^MK$E)DPPt{9%ihTafG#@`)q)#m_K-`h#?B?Yw!Sn7Ev|v6GebyQtL5id(2K%hP1# zwnNc9BZZ+0Nvd9?G36yG#YyuGh5W~0`+AjxsqRg8v zmu#)E;JVgkmFEaAQd%QIDFd?|QQK#*nU~Wq#xyC*c(!3I-qmktGUD0Pr~9m*wgn-|TG!Du=Al_r;Ri zh@?&2O(hOegBvVgpCVn$v1x#m+YP)YU-@9)SA+KO@B9Hb>7}j$%m>Ah7zxzP>6=W^ z+U(YHmKEsukxx|FMzR_F!7B5AI>pBy1Xrkk7_XBp#AJkNc59?)2G68fii}f@LRO|8 zXyF@!Ls^j=DO`UCOJAq}dEXCU0)+5M>alNN(Jdi;27kJ#=GGKY6s^mwbx{4ipc0rw z!R(>*nr1Xl3?~u@J3iS;v91i;>K1httU(fWlxuQJAdqhW5R35H2R!Rrc70{G`4I5cx$|do zp17#!>k2L#3ya*+1)N*DE2MhLRe2yP$$iYgQpYiUcMhJ8dv@ z@I&RpO-%0_gIswjne7b5?(zEf(5CXSm{LqEn)xWkuRGJZ>+WU)&>2p&oEW6lZHV9M zxD($n&A(_C*-Y$mM)M0&KosRtoIfyeE#$2(bn+pU41zBfNp91{1)O!s)=g_FMZMB5 z8VkC|9@~*#-bv=J-U2Kb+Ne*H_tNUZne`8NZ5=NtE5h9cNs4Eo=-*~p6j)_h{J|9irZ7eM?c z+J_k@hqd^fzwk}HI|L?cjLZM~-;UU#ng0^Dh9%LU|D-Wqmjh&XzY84vKT_}RfJqw( zE5ZM*;wH|WG;bjS0TIxB*8mt(clE&_0rbW;hR)8*nov5pkBJR~Ql{rboYnMM`ZmK=4dkNZ+J&-DB3F--pI%I*_vKBE_pgt{Vf}a4Zfp57)R1 z(!RCX?pY5$$(xnLm2RUtDv84B^*f@s2*S|tgGOiUn{KDq3AEN6aP{^Ebo-s$nf&1Y zsoxh3qDN>n2j+!!&Z}`YB-@rQuC3!vWQVrr+|?KnZfC5tOl*fyAL7;}Jrvot(~n@C z{x;0BH4bquQiV}3I3$LY>Ar`YQlDqB%Ycxe|Udqr`&Mwx+_iyuGTY#8v#@7pU9T(LFF1VhV|_KRR!T zSO+!IePtn%Y$}OoCT(~SRC$v94n z3|&Ae7ykURUa{=5NdUlSQtVm>L(Xk>1f_awHWOnxQZ$4(bO~&iE zn?FrL^J&PZWP$_!hwYxCH!;6{KWH7XLLBk&x>j+QU~)E%n5gDn8?cYEhLNu%u#;Fk z!(gH)peU3^iRL7e#;bAYo13Mdmr@pEV7O4d!IN!@U(0=306fpwSOQJZ6=VwMP1KPY z<-jl5L8W1S<_5_kua%2&BR$Q|cqVeq9$T$TT3Aw%Vw-0OLtyl&#=nr!RgR%w5G2bS;*LE#rl9su6 za!@2!vNY%YVC(jIGju-Crn%JZUboIJuwpH-zN*;10?<&?Q^)VJx8dzYg*(|sR_@6? z?L6977b?QC4>w=KQ!{H9@=A&D4va!acUvtHuT+hRM5pZu+QCb@W5!CjgXu{Ymj&I7 zR)W3f1+DBZBl|*vgLbJ6ARM@=5M69lknf4RXB|99y(qq8CI|X2bQc2`_9p$sWmrS? z0rAxt0LbrBSn3e%mEL+n;Rm6pph~`2cKJs@K-mw?O1x-x2@Yhsi<)EEm2g`Yd_tbY z?GKmi55G5Ymb*KqUHmbk!-uBIP@5v&5@PVjo43%2C(#2Tmm2tQbq=l?8 zFuPaHN*Z1+kyu9%tMK|04ujr-?1k7nJjuif9^jh0(mEktk(%UQd#hAVu?OU<*SA61 zh1csBhoM|n6zfhhAt-ba>CStgM!K6y`rgprGbV2mQEFH@WTe<&WC!(7RWyxudtl#J}~ic39SL!XGFueJfg8?KjrR}l(A z6EIg?ieViZv_t5%k7v{I6RVw#1*(UgtK{A~7Rh%g%XJhPsyBQm;daA4)z3}Z9rh;P z0rDpKmd~zdP!{B-_&K~ueJNy6DiyaT!gbgyIZV%)vgnUvL$U*=IH@~yPRcGS5BY9c z(DQ*K6oJ7N{tybJzor=!6oJvK4}6X|Cjb!BN*e$mr_e@trZRhL#0#s)#?3&0c?k=m zxa&mbi`Xe_5Fb!}Ndt}TlY}BL|C8HC0VM*W^e2O=Txp>6mQZMP;}&H{2Dc%IlXtr2 z0e13^cAIwZ`3?d(YT$!Z@#qhewE`2!!ib}Ys26j!6JvV3@>#c3fcdT_RIahOJ0M5% z0}(*F2lHYKeoBG-nI!hY^4=cAz%0k$_`uB3OISE~a2pg+C4d7es7 z7|G_)H3*KS4Vu2>afwaA1}|$Q5+JV)oL+8TLxwoe-q<~VX3O~zUZ|#S)G%Y|pyydH zw=7Y0pAMc%Hda8zS9|-M8~mGlEAbf4vI=^u;ddVHLUgT&*O)rV2u@Wt)bu51Q(;r90`dFdqWw4|zgi~`zQ8Gw3?i3aY( zXWP`y1g?{Q?f~gzX@JK{$Ck(&|b&;kt zD?SXB=2ByEoq)K;aitFp51{ztjQO7${Ldpp% zkvKy%o}EpXw2z8nFi=fZ@-$IDXyR$KGG;HRf;ElWw4}WX%c6De2PoenGhuYit#HYZ zet$~yePle5b&47(f_hCd=&n=PrnKWKJI}xNY`^(~M({w#{(`;|1Q$Hr72nvJVu57q z++i0oVK!HmWhk36tQI!Hy$tC^NoJbRr@0Y#AK&c!s+YF0q$7M*TV5Yb$*E<?M3IcL=1&)@US4E%;0{ zDnp7IV|93q?h$dgmd+`uPi%AJdbbSh@g1z9p0(VA+ z{6T2GXD}URMYI;*lj3Oc3`#~AQ!q!AT=~(N@ zd*VD8lk3;g65BFsCVz9?pb0V^A!y@o&hpur$Fxw_RMl2JwH=M_M2jgR<@3h-KsQn= zSOTjGtD1^hOMq$6%kN16qroZC3C~9kBK!5`>YA2}Mq6uD8iXv_>F8><sftDZplWdyV*oxTumlu~w>p{ULKDk%M(hKyu1aOZg*uIVn z!{mGp<~|wCO4xt5**lVqLZ@-m!>D`=55|r3T_2b9r2wj*17&CR?qifF=6@oRu#y(> z({&yPsoS9Jz(1jUp#BU3J?55b!rOp`kF@q*E&2MZp)15LZAga#-vED7+_y4}9gR}@ z;el#-opBys;)iU892ji);9hoGb{b;_<;-p@((d;Qp@al;@r5cF!_uyrv#HpyQ@bK-z>#n1Gbhorp@ zZGieaGq!0r$0nbZ&NVJCFZ0{Q>J$9$?n}%)C@q&+-yc&eUcxS@Nzj)FApKz`PIbT9 zYE2R@{ZNlv*B{-5j1nJC*gkhmm5zOFXmDn|l23oHAMH3IF{jn`VcbmdGkbDAR^jpFBR5fBA*zLSUWBF)Z zd~vHRbK76F#HtujE6{0v#0!j+9wN`0A#VkvL4qzA@@cuYkCoMrT}YgxTKA4O@&e;P zs0#p;(oR(Iys07fSd1zk-%5sDfsTPXIS>$Ghik>M{A9Lc6eTaZfM)A#*9kU|i|XQs zD&N56T>E?21&6|Bm;NbAu&g@866h6vXXg^+oHxHX_2Bir*ClvVi8H*Paa?ZCq&v6o!cEWEeIg(# zP76GSsf^Tn+piIfQ?4Uc5J~Vb?h{(>Mh>5YGGgV0xgN+q>E85iCsea!jPpHxKL-4o$dIl&0YCpd3_PeacPkI==okNaggvRq%6^5C13k&I$(BoCVZ~x z9;=}^aG{(sLB>U7{w1o7dUF8rq{BWMVjX}elmuOf@*WRb5ub%k+x)8>WiqM_Y=GtHBNfOKpYvt9vcv=vzT@mjY8;wo5M&XD1{y;?&G-7g4tv|$J9R%law4F zL^WDA$^@dNFvVJvW~fs?<(HRj%Omn!bc)0S&(08RoE{#5^@5fpdr;`$2eNYJnBfNI z?rsuvtJi^kiLVX<4jwT)LwxMOVWwgwQ}&2&k1AERnr z>>;re-ZpF_23F6QMfb81c2EV`p8bF*J30K0c3N7P6@zuv6F`^!-38*DTvKQmO#%P( z9>WPmBgAZX7?Vi3OcsmrM%{&jguErCi|yy0K&>5=8jUE%&!6LC3H5=_X{l<=sv3m% zN~k(0e@>lub(Q_GF#yQ@gFEgrEm)U%T#q&2OZn)n=z$yxhiiZieVKDqLR3Qrh!W%PaXn% zWVUAyf5`Bij*4wwkHULX`u)5$y!IjWTf9?*asziD9})GY^&_;a?jOB_3f&CuO!rnA zDi!h88sa6shBIJ-2g}=KA%<@}uI6dW97iHYK_W<*|8h$mak;ZN1e8 zo=LrP1p$3S_a~t;zNGu3#Q4Z}D8Pt>Zf6}SR}z&W`B)$UyjQ6%t~zV;+x!lu@+W;I z+tDCGTV6{(Ry@_`BzL|nnG53Upi93@Tn1HKofTwHSe0<)%1Jy&7?sEIRn-CFO;vu8 z9f|4lFB0S|3BUKz#h42-HQ6kcC@P1SnOJ)2b)xG&teDHA)4R_Mfd|+dk|h!B6&ugX z%&at>8n%B?nwKoxT_+hfNCB)w)Teb)bk1l|YSV|K=cnpYVh}3Uk~&zbA8*TS@eILB z)2|47{3O7BiP~s9Mp;&DQRxCUvT(L6xd2a8h#f33J!RJF8{j_1R=?*c5%mHt_x)Uu zoMolt(=R})VrFQDHZs7$+--g|2T#v`Xn?5D>6{CkWd9v#8 z%Pc^v+>Vnj4u$eScdJshSCC{Xhtswww>~LzrMiQ`MygyH$@%@1Y2^g4kL70X>!c7k zn>;hKQo)hIqB3f_@ElQ@Mm4SUGiAX!U#yuZZpaYqf4**1?jTB^G-KP^jN53Io(#cR z33=4vmh~_^-lDv>qSJG{WEhH#5p3eouZ*pBc)H#!B%5w2u_gA+VW4)=0~q3IR?vr5 zXRqqPYqBAj&o)_wP>}%=lCXxGX=N}TTE}b;CF#I}iPll|I@xuY$M>9SW-)K%w#L@> z^n_&TZJEWCmcJP}Yac%7qXuCXg(N-K9d*u>Qc)={0$kc9^nG~EDJ%u)na$Rmy{|w> zi7^V%KRWGfUwA4gS&Q8~`}uTn6zouG5e267h6@`XYmO$Y%aO?%$1IgTtHe|)+i zxNi!S9M#c;@#`k4646iOX%4z*bB4+_9NAEj)MjPNJJ<8AvK3n$Gm3&&ic4HbCY&NH zjbb8xzf!4RXJi6K$vU;e(|0!MW)Uho3nIo=L-Z%wP*cZG#bTqTPm||IP5j0Q6RT<6 zsX|QNp<^dr*zi&Aw6S^zfw6i=$*}-^z#ZI!%%5TWilQ{+8eZ&`*C(`v*fa;TFEGox z-*md~93EV(vMJ@P8+Xa#qGDh0L;Ii8IZbC}EQmIF&m)nB*{H=A3>ND2)Qx$6y8UX!97OZ#=F%g4V^yE0lWHoZH;QO*sXz8I?cb__Gb=l zGmqQM6)}LK1iT7ECD;OxZ%TrbfBLlL-s7(cG8-KAIyisk#q7Oj*}xhsRKgkV81!Yj zz-@5mnjtwXin}zK+WjB6(-uq_F7ffPNE@S&e=*1ZVoI7PSaAzkBxwZ@!q89P_D|pG z@&BEq?Jb!JAQ?^&gIf6HK_adc=@%9?^{u6;s2>IxFNlUJv$3psKVd{Py5=shRIw}o zTqk6%OF}hFMhxn^AuGiyRy%2nm5X#Y&YYeuo8LZ>KW&~bWhxD9sb9litrx9%67l5D zsF%%SC2t@-0i|p{Lz}XAt~N3UR5UcXL$K+VtGKDDGrVfVRTL`$KwZynt(CRJ$0Ul#DqAW@PY{HWIuB9juLaJUjkLcZHYz zG`X@iuV-GEBGxIF+~F%VDYuO}I0I3+BRm|+F4N~R%hoy<26qayJ8Gm8Ae;9L8LguM zhOqOt^n5#P-|X`IO^zb#%^L9`JMhIuNQ%-II_PJ9Rf7o4fnEZaXo1@Z?}-wtw2T2{ zt|!+^Le?LXI1H!Y`~M`z^$!hC4UY}SbSCg08Uir9i1(qnxBoyP#>8muSw&uGIil~; z3v2L3Xy&u(geR)IXS5QYmr z@!JVPgMydfM@kTJGep8LbWS}Zb@l6MMtD#I(8TNGs3S^WDbW=#RJ+9I33u~0X1fU!=C6X7>|MK`uZp9+WWMf_l+>ujN5A-@2mlG3 z3tPbl9zSL{{eL0-sgWZ9j6x$7Vnoi#7ht&(GZT+g^;TX&k&@+yRS||UR`n`Ezd2@+ z4Ui%paB79~1^!JLu|6suWfJiAD6AtT!V8@JV)dS;KniMwP2a4mG@PM^pDa_Sh5rky zW4nue*~&%#V+GqC46bI7Ci@({DCK!OQy-h*T9tDbwN1Ga0c?%xv!FzE%JI0llCn~=Yk4U>*0A;7g!f!{dD{GAIz z;@?lZJJ6kqFpD|hW#VOc+T3rOzT*?N*3p){I9C~wG|zH0mBm`NXsW9s#|5{XVpO52 zplPXIhus#Op8KZYDp0n_Xs?)#vbt)={EUcKsWK7t7Gg?D0$7Q5m|;~Em?@b;%9z87 zYb%D!S2kUVeV#gGd;(BQ5PFti{iDU9uN8}L6 zmSRLwxVY$@F*0||%q1zg&BbP@&EUo$U{x5K5qlwzj)ITqP}=P^LzSpHiOK}s-t1!nI1iT z-IxS?y*OU{jbD4ibQ!JQO&b0>LYpQQ?=e?*4jD&00Pw2=(e8XGiSvIDyCb8=?`tIF z8K&pcNx&i;W8$M`1b~G=)JnJgxSvh$mg!8H|5IcI6-|Q-n})ogk!;YPvP&rZe#&5x zpU$>fEK>ng9OOzX-4-g3X$CLT6=Yf7knro!!q==e6M-HFHtcnr z-<9furK)>%E%EYe7b1rcD?4s1iwnMHV);V(MU2%dS7~bSMOv@|Tkr(-q5kG{)sygG zzk+m5rzBc-2-oQHqR9Bwki*y4mJq~_G=$zyGW8jy)7{ADvL$YKkchk8JNndwAjJOp zUx52OvZlWv)Q1pPd(siSl8Q465o4;RCJ@E_oAq1Oq={Z(ntsv^>M;aqcufPFEy6`- zi5CVP1&KYE>qNe)c}1o5FboT2@tzou+t)d=b-4{cXXcfqI|Ke?^}?AQfOK~UZFfz5 zH}Yjatn7UN-Ce-ytBZeOHVy6jHZ+=nX92=4HiJKVq400*^4`psc>H7a*mn`U2#D&u z>J_PJUHJRUOUtf^R9%7X^Q>SV@q=F3v5z}{qKO8@0x*{R(b$4^-1>QyK9{J+lQe6rxmUM&jmp zbp82cmp>hybrKnJxFkAJ;e4bHai)=%{O!)T=o5}3@(HA>doKWjaw`45{-5hX`X5ys z_P<2}j&G5G6R0uhg#14>pyq4~?cYF0>{lV^Ke~VjPb=5QH$z81l^+yX5%3DY8Akhx zp|w6*)=EjP5|mu0jK(>nAtr^46NY9gQLM{mC6{!Rx@c~S=ZJU9-cq$X^;!Ju*Aj%^ z1ZHI+ebFQmo?k49AbjTYT>44C+vg*%g?AVKX6%!YzQ*y;^KjgKVWdO z9k4Yvxf&>mkHy?_paNN{2!J$%HiJYjY3Cy8Pnib^J<+j?*vx|4%T9~G1Hwgowa0MIz`O-`k5ipytgLChjrM1r=NWp}%)r3E1?O9XLSlp&~qi zrL)(Hu{Hs#A>b~~UN%dpF-6Nd)NM}p^-obGvA43D{|#oRX}Y$ZQ%MfL@M2e(N07l; zY`*pB6NKzQ0ayAU=ARkb@LPr1Uh_2|$E4WgH?ywxWCv^AV-fomI}n0(Ny>^X_^NeU zNT>X+gOM&|C0^Y{8o&?GH!**<5blZ}%VZ=TQDQA~HVQkMMtSO%QF3HeW0)S(_q0sM zSCC7E-Eut#l{sq_Oq`BX4ME;>_0*Z(qt&cvxXH|6bQ zpwuW1CCkz1`Gu{c((158y{^e?oYVByi47#KyTSNiqH$4H2{4sMuwHz&a%o@&6(RA`;wMt}jAgtu3#}y+NznXEXMK+S%l><+MGFErZDT3{fmlGISqbLEdsac&@dq@l=ubiXm z0s-@e$|aE32xw9DI73yeid&*-l(a{)$V=GswGdmNjRz;B{t4kwh<wE!u9sM+hi4 z{0$1o-8(_{&EHMcAe{f$k_2PeGX?7%pn>*{P6Z2C3s7_Uz|9DtrEL5puLIy6wL15)T!r!3t{qO?DhYb>mZ=^IxX#r( z@fLkx->*7Ym^RUmwRKdtXF0>cR*!f;uuX`n0`l{DAcwy67hqhfxoW|Xt*3SUwO8C& z?yrW68=auUx}6lCReMi_FyPYoOq^n^+fttw^GSEWnH-wbsY9yTc9vAEqpFjqsO8>N4;I5+YPQbh-OR? zd#<}Ui%N@Lw?-Ka8OdJsoWKr>xOc@VW2xQF!3!9^ZXwDe? zLm!u2mDQF;FfOj+O|y_@4rV7f&aYaM>bmHh9j{XaXo7cd!2rG4qgrheq#kZdTvAQ` zrvj+f#TE1n>jJ)oEob0@&J$wqt!|7t7yS3i)9b;^GK9o!ygy$vD2VV+D16_IaR3>r zJ}9sa=nzXc*^78buJ9sH1{l87Yo^k)&jX}FG29TyLmcQ#exkUb|D=VaG*dY!7n(|- z)k9+3l0k*XuR8*?AVd<-CBO68*HL99(Xu*54XoDtwqo(*r4)~#%+nrw=}BU|`(jl6 zise*jFYVUh^{(pN{rFbbLoku+PC(d`MN<%5{TP`|5nqLpvXFQOFZt-YbQ?XIYGKc) z;Sljz9JXmZj~ugPbWACGoGp$MRd7^kjjHV)eHAs6CK;V77Q^0~bl6>ln*49A#O4Y^ zdGwm`fyT&D<9sj+8!eYbQ5&-jW1@z$wu(3`2qW8hy7ysbE;L0nwgJ1yTLAhp5s`Z6 zo}HSy2eZz=g^{gylL9TPI4~M!(G%sNsF6SEo*k5Zii3cV7$g*gKh7uI4)NAkeC5dt z#UB0}MOW}w|6WN8IFU zVP17aK-3t^iZj;T-uA>MJ3z4BXHa4X&4yw2QFr9&4QYMvF9SDe*O97Cii5}irR?0s z@J4B%r(m~E|2b+JO``U#^N{v~e1k~A=lnJDHYe`Zp5qHRaV-C_KHTGCcG;44;Q#KmCj$=uZ|^%5u={_%;7$kJ5BBc{UKZfke@UzUZuM_t z-=r>EtpDq6#N~sg+Oq(o{huhP29yhqInw9v`0fSIE6$ij5lNXO5xSOdh+t9pK~u~= zJMlfznxi9hP;-|eyJoe@)kIMoIHf`j$R2gmrn+#xfEDuEQZbaeKRTs2*QAf~^B_W#qwHuL4Zt@XP8QHKRctb>>Z>>ZPGa7aNOB!!C29vypZ z^5kY_wt4u6VMxW@66b8v?hhkdIYhK}i3~{JG27`~_tUuti%ipqc1a2Skdt<2(;J+u zMZ4pMqH)NesKs!s3{vH!+lNEOc9a^z@%UipuuH82x4++=c87`GJ|Ec->Dr2RlW2pz zLA|4gn&vzLjC8gGVuOA%@j=~;>Ta$Mcnb_cka1HCD6oLE zh#r)iP{vD6n| zUWKH)PAO#8i_%>i-QkJTM)X!&`hqMEzrt7QzU3LCI>l(8lpK6yFs&koi`WY3G9l;V z(-dNbr+{f%f#|S`8rZZ)qSAV-;xuZYsm1Lf6Y@KgV3M_#?kRUKzTRmDHrhL0R zE7cPEwNWfI^y_J<*5sB2<(hrP9cE?OYllEwUX{WJKRp&W43k%F3J~*$4qW0ta5#`> zLnqpsUgy&FTZJvCSvX8TYE|@jw#oVQ3>(rZKGQu!qlliE2Z_J5xQRs2ljsg0k(p6p z$2g&7#TfYL52#idNFv~|vQHD{r>B7dE>LYw{)WkS{6r)n_`uc?=252|8451YyeaFx z#5EX+>u1&56kNQ7+vB_R>UxU$K;Q{Q|23d@=zlr_9Z_!c?pgZzmvx59jK*p zMoHP}zh_DLjzzC1JBmJQi=!z4e%s$_M+9RnPnf|HjBgVkxVZ-*+ zjCWVHGqXRDn$a1}th`G#m6y=Gj=?Zi$R7pW*z(HD)G=UD`j7sAG%+0KZFk~AV zfBggFHN528i(gGbR8{n-x$(?3m0ix8EiL&-cSpXo;Iq3w%>r%d35s?DTrf)`GQ302 z;$l;r)5;xM#AoS})Cd`Nn@L;To^YG+3QOGdt1i|D51i%Qw5zHMyIXn5`>1O> zvQF6^s+=Q=xzk-Ianlw6{!Jv<;*}GXB-ys}Yx?zMt%*_<$^HGr>s9t03yid%MH>m~ zAhqgLXH63;PQp|HmeQ{_VhMEVam{ej%{%9IG^&u^lNrQ|Zi_7;B?*&yqhHT+=XU|l zm&%WY zs_6yuxt~#mVhz?n#0>Cj1~NYDVg%eyp%$4=6hZsp<1fu=J6Z!*Dt&5CBdQ&$_Z)|t zugIIEn6PA%97sICpeXjK^Y+zDwL$XF-i1xpXMGuT%-zy#MC*D8Lm*X$JDMmldf%`$ z-tOVco}W7eW6ysw3budvpC1R72@p)T6efrXFfxLu6T0Ik1Ml%vv>~-SIv+qq37#W# zh$;Bs-*4^qTA%f%tSO30|BXw-`_CP2aX-y zuc#xmpVMy`>g3_?B)}Wcm`8f0_1*%NqKl-Zf?eKLS;#6~7$jw7EjexK;9U zmTrrd3s(hn;ZtLYo0R`Vu*XIZk%Y?XF@;VKt~x;gxV!d6ZMM#y642K)5)3jUz3t3fT$uUHYj2S!ki()-p;)4brYOv+Rlr6S?Av@kKrt6Qw9kA> zU^t)#7?(q9<8S{8B#{T(!bh1E`$<JN+x-wDt%BGFzR2ebjlXZ#Q(IUBC*el}-`A$9lxXMCSzmeD=Eo*WC;^dUeo+)cWR0tzqjf5IO= zl0~BLXuPD^;p5AZ;YcuDV&o+HQ@5Xl7oNQUa$j5?S#i*P03G` zIAdXCj+I}4_|gVKZw<)_;@x?I1-)hV4rmb zOiF}ye!-AT{boYbYHtj!UU0M?$w%xZzFVebdZpZNMAlDy=@{Y|)4W$Eta(TsB@GULhfL&Onc(Mtps+%sF2A(r_W_tMtQH~na|;A+g#S9wKJHhAt*XAk76y*!j%fswt>7r|o0Y5CzS zhSXX?pujFU26>Vs8EQJo6iE5YIVA{0jnis2V-J9vzK6%|Hsf^JH6DYIZe#PCrqg4Q z*?aJqKZmkWksfMlV`S{5OMn*J^*u)O*H#C-Xa|EF z`2^qssj|%FJ;#vL+I{BY<;THA>`P*ddL3n5F_uOm(tx#q!O0ix+zw}M7FVIuo@dMj z_h&%wxjWj5%X{?AT+T}5XkicatDYQ~zqrPwRyP`(bW$X`n-83^Z7ltl)k29%o0+U6 zszhzgsPK_C5zeJ1@Rtb+b!PJ{M1BEG3N@hYw(KT3i9m^}1)<48p!!_~JpU-PpXz58 z%Bd}xtJeMvh}&&{u*|849O71P8KNR|3tqz(&tuOzVr~YHqm7G@b!U7Wc#@Yn%7$g_S^YKh%4Ic@s$>jbu6dw$hn^LLHLZ)i4S!7JdLL|T0|S` z{ch1w(zrYAi94+S=a;Wn9h&Cb(|#}9pW6iv;CY{>^JDY0kyQh;~pbgMdIXV^=C`*xV=f!HPU zK7)W-o~}qQl0C~l8Y#9{xHs6ijW<;J8QJNgs>`jT`9WJ_K(wlhX|Z7Bb(NXLN`CPo z%`XaPVvM`O;)X~pg| z+NQG=RA*^)4fXZ(F9yTerg`0A?FLH{+al_=Sa7F9Uw;d|{vR-tqfsJY2z>MO=z2Kq zBF()*u}_nK1Zi*CsXt9!=nGMp!hawaih$)GfVa%;gM)`{5Ah;HSTc)#O|HhMpjflU z{GK5jA;ky3>i*cyJWW+`4;^}ebCKY2LJ1{uxH&<`j%35P+Z}hD6eDFq=oNRiT1A}AUd(R$n?0lp=WIOjX0thH}yx%2(tWO!_ zHUTlYhr+}1#jokC{He!emcB&O3}{`KBKUISyXtawzSX;ESnnl3mABdycEdOKju5^K zgX-BYKR(l9S8gdRKWV4+i(VHc`o+)ouP<)zp|D@d3SX%KUnw_V zsCi(u7`r!^@yBpr_Rv$7BLb9usg$`5lz_~WF(PL!FpJ>}=%b0KgxFq1J>Wo2G-?G2 z)v~$pWmaKAb86N|s6`up+5|Irl%n zlcX2_2fO%gyiU?{qsXlsvOFCw2%v>(<+?AHmC$br-OT04Vtaz+*(!7;cUg)DtxLbF zF5z6vdZTDjDx{7ipl|^v46_3I5g&$5L|X-gHDAY%SiOa@92JJo%>I1V4^JmU8Nu-C z2G%S_w~lvCICVYup({p-y!<2DcW{Ty?*bbnKPRF_p^TYp&38jR=jAE9S2nvt(vV?U zzl^aQh(?!Db<%pO8^XVi0Wiy%!SC)MI)bH}ud{Pe)usIDDI>`K<^|=f^vmW^yu%}b z$PSY~6I>ly!Iks!#(s`O#>t9z?9~PiBX)(Mky7T^ZVryulW5OiJdLCj_M%X0l>jcd zk~vRnv{0e#i+LNPJVT(}+(e|<)&_;GN^qc&42zNw?yKAZjtJ)57Qi?ENM_FgA(e`5 z%cg7VDtXqFO$Ug@pTGL(lxB8ri3(foL(g7F5^fFt15d8Aiu?#sV-{&28!nfx zn^n&Vg{U*_Bc^gQzn;dcSIZuyutac_HagSoZ3J!=4e8|`ba!%j2l4ksp(2RrxgpxF zkOA|s&me~*Jcf~R98iuzk%LGaJ|3Z=`ja^!rraZ3)tq>WJs2d&BU_7gZi|_m9;HDV z6Ud%=sb3X}LSv>FSK0a*tVvJX*IZtvQM0o|z7@2M)MrdS!bwhQNOp!k{&Xu3C!@C3 zJQY2r(+)XrZb=}MzbbMcYJ~JYR=B%3#XMc$ijQs%qgmrL8!!cm>|jrXqNC%WIk0_l za~f89;V=$beo$>PNu1JDsCG<*e2YWp)15Fa%@PIF(CjzI!`n_4yV7LciXIk|5XmdL z%8uvtJst~}_|vht45LJLY_F2gW}t;z>wF};1gwRx6Z4hs6IxR9MbDPKkn$Dp%WW3# z16`{*B3!GM0}?yjH`+V{TMZ?KecJRr5fCRzRW-_%WILof&#aOImI@ODiIP3fu!G&{ zgcRYYs2}I?lb@s-!eHFEI`Lm+O zhM&DNsuB&o1aoD}ZmFIdB(ih4mD|D*k=J&u1QY@W%dz{F+cbSIb3HMhDf2kd& zVj=5K%+IoGR}K-vlb1d0A7iiNLHhl!Ss;cGHhL((b>HsxQc!7n>c@-WJXzr>b&4U7 zJBGW>kqADjk1HLIc(G$8Mt4z=ZzcT8U@!!TrPi2j|Lue1z85QJ(JE^_9w2ZlPX1GkQZY zb%)Cz{sQdLYc!*iaxKA&&+GOZYu{{W?VJlW3#;eyKjRmsNce+pQv`)zd8=j_cyp)O zl|14Mw74tLzpA=W-X=&4AEgHj+Mkxsz17r3uY|k9#)siHDvqkwoXOcZIuE}=VFg8( z9BF>`)`U>yDC-~_7mF%qnW!9^TE_V`qxlm{Pbt!gP;I?%l#lt|r zXie`G)m1+G^VScP^CwGiQ`U5;=$fp*SsC{rZC^>akZ?Nu^Q+wWp{NtC*64>(sWUn_8T!RE0%*Epo1yZ>L)X&UQQKLXHn{INW0w51pAiH-TT zraN>-nr`gIm%_O#dB}Id%kDZP9v$Q5cgN?D=9oovMj17KOV696xV`*V1$q z()b*xKyPM5`B2VJ_! z;)Slo-|6cuth0#!Byy#SZ&pd%#2qDi%ls`^16hTZ>qNj&@v|xG z`foe9pB32P)^#_Ax4H+1WRfLmattG;^*7r30U~(wdBpu5Xw)(k0oQk`bm^N5MYKt; ztd=ufHTtRY`eii*S+gWot3S5sE-*oM?q)b1!30}UDs?0}Zs{8aOL>X2bo-U`?UEZ8 zDpKPMM{7wF1Qt3FLzTuMTluLSIt4sKFS>u+L!t)&qd7?gW2)x3YjXD7zFHi6Q`l>c zBb#jFmzgbNF9bd72M+{maDAq&*@08nk9=#{SRjJH7v2!4mOqde*1~MN5Go+g?@$5+ z$-9(wJXIO_Blpwt90jA-3HTya!=C72rdaOh{v-Ubha@qekF!wH3Bf5LIT^N0FdV$X z#yVd$^?Cup*CMzkY@pC?&MeVw4gG6?n$>{Ie! zx7E-&HT46c{#Ujp&oOL{pj42PFxDjTCYVVgf<7B@P+lSuT%<%iqSSrT)g|6<_orqT zFh{Ro`vtIr8`DX!gKJUz9Sdl_loPl9}e2_Iqpz7syS%ededDoR7p%i9h6wKi`fG4 z^{ul8;2yz~94;j*M#C8nJ5VZAuJ*Nli=Jv=cmLcgCCf@IhrkPcR@NLwuI`ew+X2>K)p{#5kvN?KKd_h(o z7Feq*o&GKJhg8v^+$SuJ0h~&kQ)D_4zD5TuW@`L?wUrA8c!OVx9nTk{DTSXKax+06 z9mir{KP*tXY4u*2SNIC`qAiH^vVVciqUljAT1BCjK->>XiJ{uAfbOCaBoKq|#^`N! zzhcR%HTGf8R-bg!UMpUu&HlIoXPumMw+vkBC1{WN;ouGrQpNLi1XH~UVDf!<463c~ z%y`ft^E|aSy&uzCBmyKR(T9J;B(s(c!gSMCQ4*gCG=#8WtW{s;amJ&d;X5Wjh`07{ zii@^)O$qpn-xrCETeT%FE!7q8JtsepA@l$~jzRw;^sZI&#ef%oyRdwMta{&}H!q7A zOPxVz$`HC1#I3G5Anbu8=8KA0O$D3YZURrzcM^HiiYe&|=1mvuy~cZQ|L{BxvvBY! zYdx7yjYb+G+&NLgokyVtbWh0lTGz7c*}8zKi(aZjow7NcdAdi5MtoC5w1Uz9OIp}o zKRLXjHFZ>7Os$C`LP7~Nzjirj3Z9Ok0UQQ)D@agG z)!BDvKzMwwQgPkOBLo7iewUFsJqi6o>OZWKH)W0XtbWsDw~l+lHxFIuGw(Bv&*5=V z_&$l@9T0bLpp09qoJ=x#YdAb3p~<}DL;FNTU*D>C>?2m`Fi<1?dirIbv@-GX3W4{( z!$N0=`iQCCLP9;?!abS6nlGKe&43U=Z?!p-HQm7Z-lH)x-N5tSIg`A+FKWtfL8Ll> z_*fuevYibiUUP^CxbDAbk5kY8Ay;fx_i*puR8FA&C0CkzyumU4#epmbg1`QY16hdx zSNKQKFhqlk|7-P51lNFhA9*zk9Os|FQw})#zd#X`0&uo}bdF>xc>nv<^xJ_`u5TmO zpe5U)gOfIYDF;t|FW}p}UI!iv{F^0E*x7?e`F10s^_E4z`8P|TIj0F+`rp9U9pI7R zf8{C@piMO2b|v5~^5i|Ewz;MYJOKDNI(8o5it_#K7Dg&rpbuOSq^XMgR*)Q#6dUz} zSZ#xX#u%9ebnQ4>#W;mSGx=o+_UQyU8UYjqfrSJ@wiQ&y2;|`qtI|K9mUgmhlK<6(3ZAJ1CKf^PEne?vxucNuGNp9 zX+zV>8z&QXztYO40=YGzpfu5NrX1FnkMJdB$jGbV2hrd$*YWKWEIDm1cuK73Des#$ z@BI2knLmO$uek3n!JU92KRL zcjzpkP^rzc9IIsec=~fBE!l6viYCjwY?3>F(%re1;*+sLl~Z|;dw9FV7N4g?AO?^c z8PR-6tfd@5Wx-ZIf*)?ardln)#hxr*!DA0hA|?r5g$L0Rhmk2okda>HI?&|GW0N~! ze{?Fd!e+aH=d}@xwhS{SOQcDzrJ?B{6N6GQs@So{zTDD=xg2#PbT#twMKbmZh&J}} zYiHsc)P~6(eZ)+jz{O)^-2&T_bB6h?{#V}vSM-`N!^ zMQ5GRhm|Q&=yXrQLnFwniuG!*t8u<8AS{rt`2i3g9+^a?D+-ADq0eaJB|$eFIGlB* zGp*C9Ko##1X`N8N`h`!_O+#_KPI7dQQ(7F2T>)gy)|Hb&h10Ah`*TU)*RKznqI5y# zFKOi>PzIR_^_-EIi^=z)8xv7t0zG96^DWt~p&b~5Hivrqi7eKbx@*%cI7EZ4ak^5L zk*j$lTz#I!QdB5W24SlP8}-MUDZ~KL!cFPbzs>mt#^# z!^1!%xV@bzM6Nodnm^;^?zjb<9!^xy@K?8s*R(#F!i6R7Q8y`?v5Sas!%n>)#BlBz{;>RaQWy_MN7g$;F zjZlO~lLcaDOu4xu3WYC3>Ipww9?*#%JU`)rO46x%!{VM{?dnZ~OL7GpqPB2&V&n7> zL_cXmvhej7T5{mH`xdd{kt|;ayJh5VuAX^^m;`y6VXVOf@lPxCi~?lVS6@V}czdtr z4mKZ-iQr{XctgviU!b!Cipc`VoRa>K5V>O#M+HDPc8pB z_~!e0E=G_-mMouEBZV)g4}gA4C2n^GS#>=*r#>_9TS= z>EPYYf2)nF@T|rmc;G1Yy-rPyMP;4*LL4=7r?k_1w{I$D~FY`f|){oex ze;m1e-rm%l(7oPX^m;@{{;3Jxjdm61yUFY;a3(>%uS7&#T2{fNbSpunjCc_}N}{19 z&ZGq8(dUf<%As$gZt0}QigLLNz{blxEyN|$6_h}H%G3Cqg=b+(i3Fk)SB{TcQK5=W z!nPO^_s`_i!2R7Lhk3q?3IpNl{K3Brfn^l#+#gakZlxF?e=FBUN zjAuxxY<~m^meAqvK9GRg0mLAdHtIIx{-COJ`1-up?JnuwuBx7@3%v&quFFV>OSRNWWSkhv5AA3U*?l!j)?td#QPX^^4$|CWGD@I$gI{fhv3xR;cYRwC z1@RhQY8yDJyA6l3u^Kgp?yFG}UmD96gPnb?iV|KSDe!xx3?b5X?p5f#0ucGUl;9?hB12 zrx=$deIM13Q+ojEDM;g%sZJ-zm~obc32+jd2A0qBme|KgFy8nemap`zP%27`r}2y! zl5B0lmvib^3d$gwd#+1Yv07>c`L3zdTaHL!Fx_*iqg_ups`SWelhWk#h^JCE4JL~6 z3w`|<8b(e=LB&N(Mo!iwQOYp|9rpc(XEq|U;iK#$rRxpIJ)7&3kU0xqLITSTZGTUL zl?tXH!=)2C^j!^09Gs}Rp#1&Jx9mFO`gXfDLkS7Cfrp|Wo$aNl&k5cROg`ZmpXKP4 zS2zS^(n>~uNn;x-J`9R_l;U8s>BO9soBhGqS7_w^>IUlKq9xn$+_@pD&XY9kCaL~) zwmc~d-d!#L3bPN<5c=M@fPqE7y3oQU0NxyHBW*pGkH^74D^s4fQF-#*lI&KjBpz<5 zj#hXSBw52z-J2*>_ferhfU{9R{7#~yOarz#v7q9qNm#HA<^#t512*`dT+h@Ue!<(; z$3>LWFJGxL^QbxF$W4ok%u{-8lMGBalpozncEY9xKz^kB!_#^$htF;b66+Pem{tu( z)lccPlzha$ax2Z2rtoa!bz=5928ZxspxI17NR)nl*ATHnTQQo*8IsDY# zMNwz0M?SkoIcp6XQjc$~mK{bTP0}A7JOJLc+G`@Hprqn{X=9=$ln^FoNFR|%&E#qs z`L5R51Ntn?=ARf|ftoI&GKlx7+=SI>=7|SLkUe6tEQP-``c2)cX(S`VB5LGhNlT(> zbcMM_<=c;ND}r=-Cd|XNp0iJyJs-m=V&`;+KBAyL<-Y>XR|Vl3?Qf`U?BklrJ55`6 z%EE0NFj-M23eW7t8<@pZajJ#te&f3pk9;(&?*XlSvJ})H-JU>aaQq$Zzt@Iv9YDiG zZ;7H-KH_Bs)89d3(VVc9Ul6n}kjF^HT>gdDzM8&}V4lkcXJ1{LWO?0eX?^4vXBY{W z9p=m-me%yY8a1cw5=N?ZY#xMUPBsm1o#XxK2ufVOReoH?Ss<=!w8|WLsgO20H4hYO z0j1YH8Dwx1M-AnN<=*A>38|5F`fkkq=A0%V(#(ezTEmB%P zPIVMd$!iNsK>irGQ9>H>C(k@)p2gWGQHu7=2PiL7i0*LLXFlD~iHfcFBRpDvaB#uG zFsR0ZNEa!aks+7+XQp^(sFq}?nO0J=H0YqRBGB4>Z-%5-^ zN$H~0aoDq=fY>Mr4As{@ZVIBaL~pSxopCrt z&x_qyV)&8@P%9&4l5h~_dby?;`|_oTwhN2p^s%_bWj@d;RA1(i=xFAV@=tD_^ulA4*TPn z8_QYtagOLxO?XwU@4JZs(IYo}ZMPgH5gYLey+=Ml_F6x%PT{RjutaUDf8yyR_@@WVIba<{7=7duPsk!VXDEFrsjhzFF8K?sm7nOi!{hKyY|%!a^~FN}`J6)U+LMekzTmNta0sUMl2 zYx)*euRpokSF*}u4B1_smD0S_8LK-yC~nx_#pw20vL!EWR1<@El}`|1ZJk++=+V6n zE$xprGRje~YUYj63uJow;4tu}E#$|=@Ht|GAz`>7marB<<-t*5W=$Z^kW;p?ZMh(A z7zpO}$lbHg4reL7l0sblmUPF%=G44VsKE>$((~u_i;qKW;EJ<^QaDd=?G;@*vpLo? z_MK=)o2EPNNxwfw3&Cd!yZ;eax{bFq!4oxZc7<1lVvHsiZP`arA{3f{<+u>z7cLr-o60TA|yrW7i=OlBDEM~^)Xwu>~q6DkWn0%dZ&eOe=T^ofu zI-QQb`HXa1Q((GLWh0(o`dry5*8})t+Ed#-<2>dz)wm_KeNx8P)Gkr`7eX9U;ul;bYg@o(D9AZgYGL#R1SR{QSL z;I-K_PZUJ|EbZKp!LTL6*a&JiRl1rP_Y9oN_iP9v7ftd6jUGKEN13#0Lffn8XWa^9 zz-*1N{510N;PFjD^#8@nLTgr!*DuIhKQz7cOtigT6z;nLlX?4d_HyCbU#?@hEN`~g z1^<(*_I=ET7xHxHPr zB{SQkB>LqbC|J%1^}3C=0Yrn)KiqZ8(Usb4DurWG6CI zhNCu_tI_id(#sbCx+Of5SX}Wkz`tOMc+L8#KLfoKmEcgyi%Q*uOJ(*80bKfn%8VVn zDxV+QZq39FC&K7%0gtV1Jy&IMt=n6z<617EJvAXWnNut-=qHGUla$jaH;uYSIN04L zJ0+I^kU1xK_qUFa6io81kkjY}qgq^8gC;xSq%lZ}OM4-uKhK*;9+SRtd5J0)JXOD9 zF4{bqF27kg;LED_&PmB5;f;1c^GUJCp`12J@lI06D!Y$_pBEFWebuMpld^>XeH{6$ z2y3~|^@02KW_=$%{T8;?0LVKkXP&=yB8>U?hh#o!SJ&w`<@eN<$cMtv4bzt_J<)(yb(;NCOy}*LVQf|D>OI0XP5PB|qRA{I9r~ zbV5SroA_XImpFj2nLrRw^-pG62ypxlc8UNV|G`$#{|`2b1D@WM6jvve{9j|9O1_N= z{hy~yn};Hs43Yt`HVa4t*#4>TlmXcOgBP-Z=6|qM9zgSNSC$n4U*BK$%S$Imv3i?b zQ#?H~QSxRWByzKs3c&kcmvvP@*FTu84se8g_p3kZ0tns_hc1Bf9U18YXn?=O5}V_7 z0RZs7C`At-^WiT#HUSU<{~|aGK-fD#w_^%Xo9-su`FIp%9NWlO7NKy}Y{@X;<006!ZK;8_{f&AMR+yOWOBEBi( zy`w}%gMH%)W+9WSD}h+ei9G;{_epg30c42&N+a(AZeji+sB-`s-23tf9sw+W+ms&x zBhY_!{pvA-Hh=TjRS1)7!XdDdzdQnos-RQ8@}g!JSnjaaP2L#S-OWF_v{ znT;THit^4lb)@O^TAfU_P*%-FKGAw-&JHBj+TyeajTL3OjCx4A^G(lKn`r1YUFVs~ zRqZ#7-&*@%sPpM9CAI(A^~m?`m^iMu?my|KCNW>{Y1-XPSkl==Zf_&i6y#zfvn!wB zxp(VF>IH2_*{(|^NE|CMJUW=qbTL98ceW`V=M+7@3#PK)IE7!%o0@B|{g8sfg9Cta zzj(!h!6id9s#VA@zIZqlEo@YY?i3o6)lS-dI-)n9lU!-klY2i6?|7N)#y=tr!<6{JYWvyR1{b?li zIn=(V@Kkl-$F|7InjIfqW8H@8rRBZy;uBp(rTVH@RW(fW%qQPUW5!-oKODb82tL1k z2x$m6HQ|ZqReQ785Q01z&*G1jQHEkG{%s#cf_%&c;f5IKJeoJMLJXG%5b+^Fu*l{4 z_@Q*X=*4Zqv0g?$j%x4Ci2SN{e%ndOhXyt1buJV-!uTwSK}PPhP}!v}_iq`{Bj22^ zBb>ffDvhAoZaHEoCeJW~JE0Pke0w<Wd65E4fDk}x6xkxo(tiCD zBRNSUAOh#$N1zZsRRo4c2lJp1s7oH49HAZuE!Qn)tW83|qjc+f(q~hz5g}R9nWfA+ zkBRBkXw9)vjpAe6@q%2X{+8lrggKmoe$4zRf)k=){9Ze5$%dsuXSf)DHB-BcrQxH7 z>fyBG72nM{lu4$q&ILD>_m;67=la4h`90T5#u6;TE7v`F6ub4Cp@0NqkW52xds|94Ek#{EJjhKOIX_+hiLg$Nk)~Td#q?7 z8Jd}o%s2ZH!4vbXsKjk~01f2A*P&Xh#&1ncEIClGU2^b_S!3ApE~HCmS|k#Qd2F7w!d zR+hoA2?Li=GY#V`W9HKL8CzvUj$X(g{&4I{1RKZLEKa{rO0nFy1@t9|l}U?IC>;Kr z>Ct%FSdBs`7(NqL3K&WhoRhNU1Znam5Ksv5S%9+_&yN{~PqYSl4K@S#c0{2+0+ujJ z$Q8rQ4Pf@D<$z4RZjcuGbpS~LB|Yrk)-CxSC_lg$%p@=vfGJQ2z?{31oY#bFg>4W_ z1>gwaj;j!XF=MoVAgmxbA&xFQQcAwWcxW?-e?z9l4g&Zw!U~UU6dKXP%J}g>8te(+ znNY>}*1}r(&cIp)2tu|A$m^l0cSU&U2HC@UK^A?s2ls{mLPVqR2wZXd9XjemnVQdlrDW9 zzwknHFW$o4_;9eT@qSy^UYb}&BJ_aGP+dc0*3zv69r;H$_p03ybKHZy7|K!B^`4MM z?Y@T>u4+wlW$GCmitE>lD-L)|X+cYL@3PN|X83Rnn+5n1nB zb{bb<9PYAD)MiI-9vJeuEoNv94U5*vl(Cph-oZSP*H8E14W?=u5xd`1=p3|up$HL0 zZSn(x{(!S#F1rZ)7HDyfJhgnini4YGel1pu*r2~rLYJ`lmBTOrZuCf%A~un;g<|IA z=Nz{+nFS^b34;V3Kk^~eFge2~x;zP*0qoBT(xv^P!?6}_746X8YHcPp-RG=%V14JH zV_AVivr!TBt~2|dyr&WUUNRD-ad?QP_rls|&<0{qCr-@tryicJX0N7Z0sS_F`A_mk zm|7x+TV1>{;nJ7d6`T#Yg%B%9eK2#MbXbE1kIm!260JNZp*W@EKvf8hRyR3|KVd3}G3kUjrOd0gmq%fmNqJ0vzJ7lG&%`iSzn-5e16 znilbWF9`PISAO zKcy5(B=gUVQm4K=r%%$y!fmLI?w?#Ym$-Z~A}X`36MWfe#|!fcQRCGuk@#(bVj;h> zPSq7GkIjyk8#;L?W2vT_!*s_46?&Tm)0|5+7OW`LZ+WBJtOnJ)Mc%=o#UTq1HG!-d z5V37Zfa$K--Plj;j=k*IhA>QBW028mkewslhPVgcozZYHx83M*LFth{5&LK@pSSac zsPxYWxWo&;V)2?&9K*x!Vl62c@k%}}vo}mKoivVXxDu$iCb}rB4Ku}Hdw5A>kx#Nj z?ZVp_u)wKtW-(R^f4IQ!1?%FM*#@;@@@9|nfM$0a-F)_^eK7M7%_j~t_Rl`j?le=} zMSH6K;gH6~Ji8Xi)q=43{xzDre_o44m`XL3pxM4-#DVE(4UgJ@H46gD(@g-Z&&=BJgR=HZ3XVlhVI5%P5Vm}qU z&$Jn3r4)<6qini;Ye0^J>`xAeVfn)YQfNBiclV*-W-l?P+~Se$5t64vi9m&G0-Itv zgOl=RF^YJKO+20X5!kMu!uNG%M(XNExgAPj;T^YiaRaUQg6*1p$v;{Qnw89kgCzt` z^JrZ-szmx{XGh(yV6$^#)kK{?|3s_v z(F*@Wr1BiQoz)m7Entpe02N2=fB1tzU?^J7XnIBU+iJ2di^S>(@2wcv$6%|5*ZF}n z?OrOOsad90%UFg>#Ew=xSJNp>ot3Q~O|hySPh#z-ylp<*^j72D0w;nE!OQ8&Ps-`p z?#E{hm+dU;FEx@`4fJgZ#bsGO`ZRZg@j~8?8eZX<26J_fb({@pm>{Hdt)?IjJ;X}5 zP~I~FJ=-s%w6&%KYF-Y!09UcM;!NHY@hWALxoaj>He{&(NJQY?+)YzsppM z+~fAibQ2G7rRhnw#P4t>k-=+c*zC`l8Gjd3!TU(v6s-iMt<{uYHon51@}uBf*(Y~d zhv6g9FEkEvYgsI*`vDMZ@!7|3+=>v;_(drsLT*B3joYP4wLw;Fm3^s6yy{BO`${65 z?QK^#VyjZH(S=H3S481D8iml@3ZF$7=bs#Ep162s2GQyP=89r_UbN1&pU_A12^^@o5NZvX*3R6ON2 zYDW5$+LP1PQxgpey~`v0|DO~6pHte62;2gq{O1Ao?s0u%eY=bO1`h^Clguat#BZKJ z1jKm*-92kGyJS}RV=wJV{xrhgt`0j1;2>^HAy;Bk)U=!?Lb@_v_9d&Ov#dUA$ zSpKR@hLQo|fizule_;F`N}CbOw>L15jSH<b4hi1q){gtr=y0(PyI zCMMxr&LVE5K?m*#LmRje?x`P3ccl@CxfV1)>%59d@XY$W8lEl zI!abn_mR=swh9~KCcbo&3*PeH6{57VMQd&tblc82((>Kxbj$7m^)hpyM{ni)z@*Je zE31u3?FqbN?&e<6x=Mq{HLa9Ry{6^aDqTB7zA{H4wv{n|vv-z3_@k}yavG!Y6!+jc ze%_e>49GUnvqgF7!`Zj|RD)bCJ5kcdXpg>HG?8|lY=^T7MQ$TgCYc|R_je0F6D0?& zE<5JZYr{n-Ry_;14m{CJDMTq`vIx_0@MWbQbSPIiVQxYt7IM8P=4A0?$w&im`kqpk z#b*<0l{}oj`jNMoCX}GFax%tqB&Ou$iQV8id?N%xRpAR3>RgF(+P}RfnAO#Jpw1K=h1|_V75?&inWQn9jWNo%KAxr zV){pZy-PDeLx7MbeyT*N64hd`u(|1(MlUFfp38YC^;FIR(OFutl>0(0^245UPCVVf zSDkaFWHJDy3gxIEXFScKIiNkDi;BOOIAZr6kXR0v>V-%6MHTF!^sy_f|1;b}x!adP zMhueA1S;1dVelHpo#Q5@f^E04e zZ!Vg%>6vIYeBfKU^64!%$uX9J1ARH;TK+Dy5k69$swkf8G1ic)Gu}SLFv`3=pe!nW z+KF^)k@17bFxA=oHKn9DrGV5fO?+_*&%~ZOxeGFLim^k~D6`Zg)HTFGicyMDDtc-{ zs#5BUoDi%jOGJKwKI%^#nO*V7doGX^9m6Q&C^Hp9>&PAy&Uqz6DPt+Ky--RY8F_N5 zmVRB25en3Zcbbpu>(Ve--29LpvPl_`h(J5PkSFG#f@40_p$98U~-RA!ZH)LQ)V76~csL25LmiBtXZbd8q@oZm-6 z^%b>|bsi;@gx4~NjeQNt2JN858ZYk0naC}MK!>ZzuR0da2%r{xvF0(jv*6EZr?v3> z0#DT6u@&7F|McWgrHAEKZ9kk0eQnE$jGPsTbK02-THO%(#UGQlezzpu@kGDU-Vi}& zZ}X5NK(%mHMVKA49qEjtzBUy%bvyUxnf7HKQ_;KD7v-ocYF~iFj)MTi;$QCe8yJwn z|3Iu1X!3cOMMd`!%w3S0&WryyGO-{rfrfzJ9U;ss*#aRlXTWc!(+9U!F;J{?Km2c~ zW7ip*4}T(WXS%;mh331kJ3=>c%5o}%06p8*J&04}oS_o8Knl(-6N0QWaIfe-a4+XP zz(skuIshEl#6Hwy{?wJ_b2(P~iS%_c3?5l9HhOS|iC;^{{Jh~2G%Friw)&|5QwwQs zLMddE0t^pEbYpm3f|M}n&}N&$i+XRUGR7OF4K0aE@zM$gNs2vwMzIzA+b5lX@@;Us zh#j3*=7g(Z2@ZOA2bTb=>dN805aRA=|1jc1lAVI7hL%3M?9xYg^^AfmL&2gEVjrfY z+RZ$??kK9HTzI>Br86dUce+l%pSYXC+<;A?p1kKI_(B)~FGH@*4cFm^KP$D;Th8ox zz$FosNHZa+$b+U+`!@f#(hHF~*BJkg?c`mp|7A7$_E>g#@}GDhcCubPBziNfGH~vl zTiwzCUc8&f8C{^#JC{mn0K9lNsMsbzD8OH~bf_A6S@CJ?SKyN_V>;nSP%8rPM5m_sow`(_5{BE zMaj!P!2Q1;dqF^qe;>aAN5S8j+#-J<+uyI7{y?1fabJRg6Yq-0LxIx&V$tv;fsB9U zIVXA1}c*?e9AMEGal-d9cdq?C=|1&|j7U0Eu+e<4@?tOx?oxq>(Gss>rb5DDl z;Pt-=%Jl+2Bp>BKA~t{P1{S>cqwED5zoVEwpc(An?fEqh?0RqOm;~OuZ$Rw~aOAzs zYYy1-j#z&H^Z)$>YYRZGznelBW@4=M_RpyoKiOCQjY5B0HDa>FA`lq_=cTxh!LRG! zXdAatAie^p1*dj}F`M*RuO~;!h(R9Nlp1<`)Y@b9>n!O8DNlysN!xKo^u(e{N>Uz{ zlTCc3bY;jEfwAMoHe04A|5}l!?E0Eh`dP;1MnkT&mEHZr*dNc#ORq!jL(rt_ule+7 zUT`qSWz-M%TN#P)7XFSP741^%nY3Mvy!uu$!vWOHaP0BHj{mv zc22`1kNTwPoz=NoCr8Pq2{9Aq+JLSUOU*HRpue2m)HA92VN(qGON6nfb+uo-( z!$aw@$uSqFqYqUJ5mj#`C=ZltS0b62)&8>`xICw zM8}OP;msXvEp>gJmkDaI{p>bGiVK~N zmy5%cpb~%|&O`l)QDy}%BlWF$OUL{=RMmPt>(d5HA}W0%VbV(;>dii0tHzWQ*(zt| zXsBw(L}jAs>8u_nsGmZXlzC|llz2d?Ttb!@o@~eBQMms{UXEpM#hA3&!rFdr@ibcK z)F?|mb>!&l_i2>Y3%Icpox;Xr`>F;j2kn@1nMh9LMr|#X)s`z29XKJ3@;&8~%+CtL zkLLZFjOH08G|7}c&wD$&Br`luNju+Lb3ZLcDD!F-WPNjU0nwn0ZH3d&^vCE2YT96l z=j)8k52KD0Sh1iq=4({HKMn!q>&O}*z7fDt} zJ6OXjQp%l#K1h4K^)h4z2B*I=JxW2y`Y!bKEVjn;{2GyB5z;VnEbMnr+V5DBcUH+_c;t@ z4guX>bhpiPnP!dsdXf9LiJ? zVmel?`gM;_LXFs%S{#jisAYY>pEUB;xd=qdXH+WDFhK5hdVJvxQ)^A+#;BnSbTiEA zB>Gk295$ia)^GWWbc*yq)5RkgRE=_5&AD$6#Cf}z=wG8bXfQ=$&%z=HQ(X#^U5lBH zBGMRdqD;Va9A1Pb^(2ldeqkZxi*zuInb*!UG@K706pSsMJ!!OAHYN!M6G{@@4lY4p z+6{zGV}YPI;=0=#t;^%E49~c0Z37o)DslHrgD{~*LL*`$szWH6>1>Mw+w9{5 zf2EbnXzk0hw0A`1sG{-kod~C;{n^wGE@rFurBYQ^D@vq4R6P zpHNWaJ$kFlkfyF>NHhme=#ADO@&kBr%TT) z8w)R54DCICEgx5?4*>N|eY;lDlldyhY zR6$VC&-E@5<_x)# z0}u%!o2Kb+H@j@rOV+0vPu%87q3CDwa|QKFVJG>^24U351wHr*Z;?Zf*zrjvZVT3o z*1#5x)ZnDRW6Iki+|iM{(VMqI^f5Jj$%L!fzRZ0wv><5LBhhtap14i4c4lwA{}`J3sjJZ6|C#=;H4+KnvqX#9D)iU2D{MEvEnEotw(ABUge=O7@0PkBA=*p8Rxt#>W4=H(nOh3jgc}&a zr$?S%qFzIYO;We0tzd|5(ns3f%Qd&aDU>~!tZKh@eg0knzf0topsm5rkLV!0PmjtN zds~MLaI)R_SY9|<$V4|_zL85kngtlPu?`&~D>onW$29lAY-5o-I%6;6<#mx|zGnF6 zQ*qe#FW}=CY{>N!)n7tA8``mzwD>XY>~29ouC7M{ugW&TxjlL@>j(p&KGf17Fu0Kh z!2KDi$M%=nS{E>Z+xD|Vz90Y@jh{G}*SFioh&^GkyFQK8xEfm25uQqwo30%;>{wazr;pmI|M72YZ^7iss?14x|*-vOn zsIAF6p$hUHmSSbo@Drs#{f|w(Z3LnFQGDx$q8-OzXPQ%2J&Q}o=^%NKezG0dMW2i{ z#JQzHATe9~J)MDANy6u);AM5ARbCW%=aL<;7wZ(-qEK%X@x-Ve&Vq!gcmrkOi55MH zvRMZ1Rm4-l7XlXl2RSmIuZUh{EInmPGY!nHw=@}#Ec--y`U=G;jbCQc9-o}iUd zfeW)FJKkA^dL2RCpo3-5D9Uw8Y(X43H{ze)l<8dKleGd_6U@O91LjEWluLYvQJD<& z+wyE=J1QcBKXwfoB%L*JCu@S&!!(`wYN3)oR&!%KbofCN?k%$n+oWia_QRg?+OXW^ z2p)DeLCH{(#c@9ij#PzVC93BJ4e@ZJsAfLE>R93B-YsFSuoF3g4uaN&MS189xB%Qt zF2aM>w#Fz(@{0C{>e>VO9yd}5o~4|`T9rz_3iz%Jch7{3zqGRNyr-%wjD&awA3IQ0AQaC~WlOC+XT8+^P~%ZC|n5Ys{&?`uxQy z@W(uL)Z7^M7Be<7=-zR8U5-%UrEKKwA`W>Z7@{we^Xh6)p4G=DLpb>1PB0DeGW-$N zLifOe;-V%PDB%mCQ+s1gOh^e%y?YTkZ->EZeE$H!;1=w7#>UmW$<+)xTU(^yeu&(0 z+~l}$ERHKCSVLHpLoP$=*|;mCuv-99G<)e>r-SR zZ*{=vaRmxWd|5I=aE}t3@2$KbR*@Zsa_jK$GW!BTydVc6-o3F#d}~O`^B3KOE+se6 z!lTp=cYUpUiN8KizKn#E=B-`REg%(>_I znJgJLy}OV9k*RjgjG1kA5=;ADUj_1s12p0JYh#P9!0b(`;H{l|ENz=L&GCAH(a_oq z)QNVaW?!&)u)0Rv^o`OJY&uuGm5n=}bB(l2(`{J}rO`QvZ?=o-eyBm!U8OL!{IdC# zzJPSPFNgNd+l_GQT7_nD-bLg!Q6z6OwsPG0=e9vga;D2R+piYIu`JSUktfBv)Wo1x zJ9|lF$HXz~mlyodh;|z>CT@JP4wJ?1y>#RpWGJZB*8jU;XQkM_s%7PB2GJbZmY0++ z4asECa>VQQa#=de0Gc0CGsFG{9;CJLGMmK#=q2si;$@fy@;)Vc!X&gw)Z#^$NvF6q zWnu&Q&W@{c7knS1queO^4#@hNP2!=s!KozDVkGAecq7!=R_&bct#QAqDo_wl6Z;u} z%gyp72w}@Arbp!F=Ob?!|vjCvzw7qEb(;r!^=BAL*q{m z-I0SHrS=O>kv+G1+fIzXv9*{_=(A^Aq>^k2!&57CwjiY#d{TFfa?Gb|b5J4SSaV5`cDlTo(9)Q!Z+jR>Wc?=T@3hKy_XQT^tn#P%e~8{13v-&{*Q$+8N~6?O$P%p7ey?en zu40*~Q`N=XO6^rLhCWf!fv&@$_A`1l{5Hw@93QaiaBb3CU-$_H6)~5o{DP8?*u|u` zjiQB^HI?q7+(5sY{R|zWJV4BYxIy4p({BF!7aB19I#sqxZb?c z_piax3qBxxQ~e-eo4FhF=()kiAjdmLGH1|SqUz#*~S&)vTk zeev#cT++7_&C`%o-E3&uiv5{Hsrm|X`Or zlt==1IkXwUNJ^MZNU`C^`3v&IYagDdCBr+%5pvhT-DIlS4fn=Nbaoj=P*Wr%`NpLTZN{)-5UuM`I z=rd<4CAcaa!YroKFlIf(y_mejlQ$wIM&}ki`I&PnY;Rd+PH{>ol3n)6iWA%+aM_JM z?h)-L1qqk0xf$8Shcc1POT(%eU0b;36=U4{G2#<_J4g7F7bxbqG@ZP{p%eM?ROt&( zvjWrvj6YHC^fjk3NKt;E#1K?H@Cz5jG*aI9=*Ss^>yT8{ye#5) zcz!xn6jTiq$-~XBWt11EQaix9Uk3Gc(Wk-hQ4QzZ?E$F;i$Lsqpv=amIR5)S||EJJa)@3+O%ZP zf7Gast2GiG6be3hRWWGC$Zb8xPUnT}@9a4-XDCucpy2FX?jDWKs<0K@aQSd%3VK$425ijF z-#v|AVWnvPoX4Pw8e^WcvW2~5enQ{*jKaj|nSf97%KEJ)PG689`d<7!%)Q8MmcjlY zLg;~U5Aj${G7IKUGJ$xhqL?SMC05Q` zu)QL=MEPkeyn-KN?nPQ1XVF?+yU{0y#N97Oh)r}mv>Wne3tcr|krlZ%ZeJEZ=#%hj z+1h}KTAY3VA*s5!0HvcXAxps=lzAIsN-Qncyj9r*R?K+3aeL+#G+d(gmU5{})h_oO zo!6u{*F`#Tz^YEin-J-fo-UGRYPQuuK5==1I5xFNs;FMi{FnxlE|v6++4b_{v2L+X z4gRElrWh_~bX=QGL@@*_}%?_8!S@S#25v1`90qA1$nAt^p;=}IdEK(h+6U_1-sK-yt=1z<| zV3*%l9GuKPjae-dyMI|DV=?rd-ffD+iIXL%)Mwy?@_tP<)`Vt0rk&rJy7gu1z%TNn z?>?w|A&qb)?Bm*=bk|dEh52PhoK@;rOhY=njA+gY5^hco0UcLmH>;*ZJV${KQeQ~F zPxDDe@sJs0sV9}+Ip^6D#yq0n!pq{mRUcO6Kjh7-|9z9XF7!l^`swJLHwld86g))o z-N<*)qkGs&~pFzBx$ChZ4$4cGtUjV zDBsri1@7QgMjUfL%jGj(^LXE=BVEU)J{0A~-;DpoJdDxA&90`#IHS{KIM*iGX3rb> zlbv~^FDmqY#Foe$GkY%o`s!z2ycXSG2O>#KkrUJ1Bd(B2=zyj6-OO&&pNribM8&x; zWJ61H)>uB&j*|85)b|8x9>I8GcbEoF@-C+s7Jq2`{BD09?UMKvQs7V88A!bMlrRW7 zn59QV{8ReYI8MKMtLj(AudPwXT0V6ctm7xvxela8baMVJp?wbDa6Z2Y%t$Bub*)oJ zeo^0w2~J1!>c%+WbHw8BeL^q=g62&O?zwt`Ns)o85v#?(Dlh{Kj{*#V(U60$I(is8 zIk@er@82c|4#K>Te0X%>h543TC!HAX5Zfmab$c_ZjzXF7ew4UYK>`sOs}ly-ao}xgoLn<2W9ZW4FC6F~NeZd;5|20+; zm6y38=}h=>Yife^{b-!N;a{g~gH5g8f!SGsD^UVSRk+!QNQ=;UlvWbxI)?Kl^1D3} zbaMHaJ=g@z(`r)qR5cRfP1EW_MmqcDrMujmKOocqsssSwqKirM5k zlIMy~CY2JtC3BxiwT5qKLI-Cl4qn%*PHtE)IGw)HR(7r6nIYcbv%?T#mZM3A*AmwGME=^R zNe?Jwpp|bPQk7eLWA3CJ(3xqn&N^Bf)6rZP{%S}7yaJS-^1;I+eHI!bSArdQFJP=t za);AkyiGT9L8Y!du#s3W?qq`e)m|A#`TD*Dg~UpLV~h@#&!8W!uI1AGCu&Y_nYFmu zbsyF_nZ%L=3bu9a(DqRZEL0Sam3X-MJCXQ#;!F?3)Vy`v-c2GI3{((Yq0H;#7Ui?N z41Z-f*m5x$GH;(opeiErEIH2F{-;%FLG<}Gwd|uG+@R5zjX+P=u4Le83NKVgKPrU zP6ha0Uyb`$%;WTZddWA1-OMEv(;UYRZ5YIh*E=u5&BH(Wi8?V&(Hm>3mMq<~aSNA8 z=VJ8Kyo(Q-ri-F7bf$4b`3lPOBrHs4GG~L@o4`+9`jIdTRWYtiiJ$90r#0 zpLer%?MwUV=8rHH!l9VH90MkYROU7DYV{+{8MM0V=Thi;RA2iYp6!OGl02@mlegA9 z8=gc$FMU|>d{-xKm8%KuscUOIS5HU&chVuIPPQR8t)jc3@Ad4{9c*bW90)2(Zn_XG zthq@hy>YF*_aj`wH?Tc2tO@n$3H^$%t@N;)mJmynB~yTHutgSBg(Av6?SWXI&PRME zb`Gb?1k0i3D$#h#H6=4{c5psQe-Fl>(NGA-SRM-@`fKN76lkvvPU5|+OX=q(pPu+t zB!V|TZ9gbte8XgN^hQ|FY*UY3;Wd^c`-_x<^yIUkOy{q2z;xO zEuLzS(G{vGq2H$^pbg_~qCN*q~aHvnIyPSjGv-V5Z&a(b+VW z(YOv#&$r)($M1*1TG%R(Qf+#1KcO4i-%DMqbz@#6zA3C9 zDfYz5xAC^ku4jo$uhEB7-*DFiG0Hma1wG>jub4`(pQh^g-)+(v4gT^j+_jH*GSxY= zF}MSNQU)^q!BFKT6rWv$RNr-k8Fj|2b09fe{`ER3&d_F_qv-CvMg9*|L{G{DVB-ot zxQ%s?LsW@7_ZZ{4*S518(Xoo2$+LIXF1om`ZLygiI3$c!C5nkSrJxuWj>Tua96XIU zV03=UTI&^^6LjxZRHDsn5F=?H^S#32$QXxMBU>~2vWDe?VrU)6R!X=G@i4v?P>(~fasQVi0f=K_age%+Uk}n zj`euBJ45{H(Jbg?Kv$!H=NsYuwH7Ys0ldqM2+Br*zd8@v&Rd5Qq=-qf|mMDAGIpjX!O$6q}|^# z)@@O1*VAvFB-Uy#eXJhLg=t*c9z)Wg7Q96=Q_+qK@nUza0r6;?-9^?8js+P)Omf>x ziGA({dvokT6(N!Z?TV0Y5|O-iLr5!Arv(2zM`s{^vwV0Sf3sfrnYGW|@aeuiKZ?a0 zomAF7iP$d5D_s1sA6!+!kNfwKDI{4tBdmNh!awl`7=#n?2grs;Se+DO(;e}H(HMi!9&`d$cPL*1iSc(^tkU;>gc#{hJbmmQe{uqW@am34bNK_r~$uQZSh!^{oxI?N5q zI|zW?3%onr8oE9==kq%j_OwV$aF9tBF@;O^gzPj{b_RZ^EiN~aFLrY7T7TL_kqe8M zeWG3i%??h%eAcc+LTimjYyHs@UvK4+o@?}}ngB6P5!jHNq;>Hyn+GVHqb~-CqBPAUIBhdT~GWwE^+jieOE2|gqpj|g)VsjJC zsX7OPPKfGwjXPbrI%6p6_zr%4tqbCl38nu~mAWDtP8*(1e)O|C?%fk3q@YkGq@dUZ zxB!2b79}7@Z#OZZm-m^ zK<9&US~f(Q-|o~y9s5CLbIR4Z%ZvYbDesw-xgB~t-JTrsiK#Jl&JI!kuaz&@!l=u` zDrZ>S76u<&<&ZAvrUii9Ojs#FYJQAGVM6(Me*P7bpssYC zT-brwXB5uXKDRrauuGJ>Nuf&w=cf0wD$+j>8)Cc-YeFpSHmf}a|Ecn}w)mIbI z`Q)^j6Fg$26Pfpa)}YHZ!@8KiyL(o-K)28FB&c4*zdU9BzK_mgCc7UNxuZh!W+mI= zlCXKCXOoaA*HGG9LqvD6K7$ zZhkrdJHWCkc0TS^+DDTvefyyD)<6)QWXbZ(secr|+w+%u*a2u+dBu+-`~AeYtDs31 zUvi5G-xtUiT$=N&f7i7Qxb1@B4rq`dz>1;s9LIhppibF(=`HmN|2L zi7`Is;IgrRG-|ErlN|JImfpKEDW_e~p{((MceYWrm7H!br9XEH%0{1%{$P=VK0(Vz zVF`75$Ln?3<3#U*kEgOM@p&CZuDO4~a<4x>TyBX8+ief)fI-NAT9b#(WYzgJK5@I$ zhvtXq*Gqxj^NaDxLA@V3_X$kRS#GrV#3a6SU`5V&d1NP}Kw#Q*+o)fD)VeG*j8E-^U4V)&-?g0Q6p9edxl=y zZHcrp8ig!9uW`BaG}Uh3UZv^WBSqxgv>;h)ID=1j+K@PVo+y=K^h}{o#E%ra3$IT1cBZhnCdn#W!#kreaKlzSc60HN8FIuwVb?#)kLA zPOc_rO?_Df+eWqA4<}&bWP6_%)Gx3sBeLhVX7pNSdm=-S6GKqqi5%UCkL>n%z^4=Y z`ppfsl@uT9&9ErGIoSuZUB}F>8N$YOyzfX^Gh5@;_oF@hWLn)se?^UdO;;*X&)+95 zR4`Y?*!tO`N_M35 z>nTag%_EZ^#iP{y&8Y8eDvcg~NO<@t%_%J2P2j?Fs1`r7i)Z2mku({oZnrb5Y4|*tG7{Tkp0r<>ziNw z>YL=ls+d_R;Th9BGvSZbHqWRO3Ywk;Z|4dqejMB2s;n3K@a0hcmGArDc#pe&c5e&m z21X`EcyF#c8FRjyDCLno%_>+nE##;Y20IEZU+cgTXF3tyA zkp|PjZtH-_pr+bXy3`W&IF1HSbaKj(%kL|15fav!IlP%*&)2aevdax&6CK+5raJR_ z_Gyzh+X=}z_~^YtwwN|i&%nmXW`x`MLg-xu>6KiuM0f9DiR0t-`K8a<&|fD+?YCMj z$pu+r)K7%!#lPv7>r%W8@{C)lz1`T&>%4}y%iZ-N8Xc;r&5srFEGu+YMCFy^r9-Sf zc1H<`co2i*sg8J^k4}w}v@_~Z8dW1d`W%MB;EX zV~G=5>pqRhmXFCGg#lG(>YJ;Q$6k!WufK&iGqdi8`o0vB#YH^7$zF`Fn;m8#73FOb_LI-m14- z{i^g2(l=M@xHg|leafsO(XGTyOp9WK_%hsu}*5rTJAZD zC)^TlymT>Q>~x2Hu3sN>oy5%;#(ikik z_;;S3SPRe&Jy~LZk0KeTx`m&e@pBXRwNPoYH`45#nIFz|IhFPq9Aq+uKNqQ_FE>}| zYzX(h)m*v~5*|ajIPI#~85-WthP|2;5*iW&QG~oAmd`2TjSeOrTCD|fg}E!83mIVU zuBc#!hj92XhT%_*xcZYW98;;xi+bT(c|<3FXW36 zJDQ-NCmAAiy=lvJ;1gth>70$2-J#bH6g*kj7t$?NbOmqpqf3@OR82saB z^fd;+N_5bUn(6@@QhLl6P1;@K7VMGC3>G8HJ~H< zT})^!3ROvIzJpUyPcg79xN?{rM|?oAN@W+P?3D;bbU8z652lBXeHLmNsinW0P7DgERiwKS`H{4i zhl$+i0T$*0v%NpC(=MNxZNy2ku5s{{yZgA)GBb(!LWX;!kgoU4hzAe$+K3)r0Kg^u=ZrvLweU=G+?AL#)_juND3#X>023zQ7sY17w zJv4g_YC1{%RAHP-|CC5EVE37ItK7LiL-odEsIrxM7ah&W>emf39p$N8KWo6U*aUAX zrS9e#wl5oS3Vmzy!hV%t*1kj+ldpB@V3^~%5tn>8H|);RIN zPxeoF{IEC+!-@KCnLaQ#$((C`acX7xJvP{ivIu(L;$;k@Vpi*WiV%rWom)?1o{U^{fvsY+QY)~;PuC`l9*A%^?ZXJR7%p z^?WM1vcS{PRfCa|s^XhfRqiUiDB7bCqqa5>0Buff^Na*J`BHe&4=_CD8Lre{B&Ydm1-e zQV>Ce;ui{BDQf#_PvteZ-$0boVTO|8%gc1ks72qpK7CyY?*YZ1jgt`RlxHuAMI_AC zaQ!e7T(VKx5}ymuL6eiq%edpg%sNzplk+shIsLOz3lzQXR3Sz95F)GQN|(P9Ho`LMv=1_Bp5M6e7s@v- zYrEk@Npk$OK#+S#Kaxq?uW8NXl8zJ;waVO?L=4N_SVUOCW#b^CD_XkjwA^hcwGY!n z>b~Id+w#^|^})fJw1ZUN9i0v;Ei|S!rk_Isy^bb;Tz6kb{sVieE zLv;=&$siP3Hj?oGIutMo_WZx6qkw^xaYN@q&Yz`UB{-mPh@ zA64{=e)RUJbHsT=?`^D>Z^@<-+cJ8<3yo`yI^a!9o1vPv%y{qNIsGD&>0!`4x-Y(2 z7T>~FEgHHrMok(0_E|-KC2v_>H1v5eWfjt&foN{$6#Psa)#S`7YZUJoPk7c(fhn2A z)4R^(kr^CkL9RWYI|(V;o?I0YU}>q9j6sXin#`{X7ijWaD|^+d(j))rLNQB)SqaK% z@T2!$cJi%8E2h`#Nj4w4)OwSO>-TY}T?9@S(>^h4mF!3TTr}(6(~^%>?qQNY<14i@ zEuctBj_pDtFYA9dK*IEVdAIRxYipKRv9#@{vv_Ce0dnR_?;*j}c{3x`!i#*Zn11JQ zf|ti3nqNLQ?7tC(s(s5?G{y3)kVvqC-Wnqtm#ussg5~XL61^ZgAEP@(O{%xY|5AD3 z@szMoX#Nun?Sz|xmNWvAnJUYIBFQr`$lSai8&yTv-_T3G)6K9k;uySbNzN(usXCMf zWsaaty<&U2fxn({*|R45OiT8b+@mT_XRHVcV((`?)tdQv7aO5j75$HLZF|1V_(GG9 z&CW(*4K#+PzAHP7w8_iIyxVkSApWebSu00#LiFTv;iFG9nxU=XbRsoSPA@tyCCg$L z*r4C6jq4PNNmVHwzs!lsWaegRD2vqA3)?~%vr>n%LS;@2~HlvBAfieXs!>FL~>WxgQRyyW^So^Yf&V# zOCE+hDFN6zmvJmnb_@H5l?>8vxoH=~A5f>~IaC`cGAS_d8NC&kh9ni%Y4J@HiWcjX z5m+=8%LHRk^Mdf2>P?M2%hMEy1$1M1&kk=GLn49}f+~U5ebj?9ux#dp~ z#%-@YxHmmI2z_Bsqt4iE-Qh8=^U}XM{h>*NZi-sEvm)-(sKxtP5z46v%r)#dpS=u! z70Ok=a7tZz9%}jFrl@sY{}2@IesI11_mHs04%Pt@gA|U>S?f4{ZuH_50Yoi_Bh%QZ9TKMUG@W7Ar zX0=+E^Q6|RUi_N^b>qP)$+rep%BvjkhcE5Kxe{%qZpq#0u$GQ%46pNX8x@x=o|-rv znYednczP@&DAGHTTE&*3g^iDtZewP;^66MoX!<~WElW4Aj-i`qSCU1CV>I+8!&lBe zdg{V(hU(vYAYYuvllCcPRQMfU>R`eIFYT=q zazivc+-!a+rPJ{MsYmaBXbn_&LpKTM9Yyntk|YY>M~rpOq$(UO1IM@cwxIVHF0!Wk zai;qj3dO87bekKmiK5&`WylksTg#6xVmcZQz#M%svK@BnFu~)xH(TZM;R7WQRzS_ohH&`$$6>9mqh0 zFm*;k=;KqeqD%7{5|8)ea(z(7_)2a<@&oDG0`0l}$|(zLK2yFS>l!+;y#m^xlayN+ zHLk1=^HPgn$TI{MB|&Wza%K;7kxM~BkZ9s6j%AIznmGo@MWCCQ#h3%yX04zC%m!jR z{T!KKRZQcOb{mK^rlaOEKcoZr2_#25m>a^azl;f~&{<}Oc(GP#2SZIDCY9|}#6Ec- zeU2*S;7Cv)<^cz9&f~)N9FPnqHV3}$G9|=|17F)r7P*!yxBU$!6Uj_bdjn{i!}SSB zn0?O+vY6YR52DjwHixLO?O8)wU$zSqpA>@b=qwjuo*M_7ikf{y7QpoxKxU!dqd*Io zLS~`aYe30zZBN8Kfo7mc?mjM}-h-ft?>y!aT~@~R=|*01Yv0E`sY7Y~^mtcv*&p|$ z6(uOAgLDsnkHA36OfXm;qzX1duPqtDu_y6EcVyvJT2=)wN-v8xD2COK&54=QBl84P zf)3f6vDkGQC`e0Pu`-G}%JDT;ghaC2<=C4eI2$tC&+HCyp`cxsJ>-{7iHqMtJ;76@ zu4F9>axQeCF}f%Csb}T}6&=VkIUNgCvSH8K`AH;|mT3w7;=wBT4X}24&@n-eIm2y` zCwl<47JC3_v9RL;EQujSLYor?ymgL=*Fi}liG#yVeRB`LL01Iz3%fYz1-L}Vhxmg| zN{AdgHOrnZlw5nop-LgFlBlny_9ykW?uvp&X~#2Q?Kd0$n{ECM8PleIm1>x2XBc}k z5|+*h&aKit2E$+XK|Jqnv!COhv!7=Po$$+T;T;@dmT9d#tKGkoPyrp7kXEk;4O^#yUX$*mz#S+AQu)_qF`@)AC_Qmf`-)g zCX9LMWj#CR^cAKEiyoO<8^uN?WU0>e zMD$z_S?Dub3K@Eqnf{K*(4|Ml#SXpNJ*)EPjs%2;-wg@TX>Fmn6OtdYhVjdjWczcV zYLY4?N1a&@JkbPCO8_dybQ>D{Ja7wOU@DR$W$? zI%zhlT!)kE1(qDUubrsLEy-siA-BJCLeCTy9!kjz$cG0;2CU|0z z2YcAm#d%Xey=sTNcj@iyZS8IAUE;F^;xm-lo&6a+8C6m_9Icc1j=|^}dkLXupP;

)y6Be_{kOe3D`%crBk7qTvAV{fXeXcA zeTOzr*(+nLr&pH^gkq=HfA$ghnehY4YwkJ|-+89^vUL3mWO>yZq9IQi;H)R+q{(8Q z>60#uft>xxg!QOV33oj-M@`f~RB7gpu zm7>{_LpXS;F`ZYdG4^h3t}^-+>eyhf8Epy}?dc>NO({*6@z*x=3I`jNra$1feq?r@ z@v~6hTIeauTjG%zc`ZzjuON_AUo_Nl*pjZewtf7qqO<+j{)}p64)^|f?E9Xo_ven? zt8Y49y!O#XQ=Y2V((;=OdgRH6Q-G|{o|x(tK+ zUa<^C53IR@5}NT=*NUy_#(V=8Wo~n=2IR?!)Wk>vU8tS>orzp54#$lrm~)EIZ148E zt(^X2j*z4RL0G=sIGb!wLMj&7j;Q7; z<`-X!x7CmI&BkNgQcE4{@K&~c0DJMCCh5Hn2GvH`YNMt%UC^eieJ}N$Fa_42S1(MB z4+^*wFJO88JfcHEKSivi6~j~yXuk?YulS~tuLf`qz9pgd!0f;YH#E4E8imT8m_Mbk zO!U$to(;{VZ5k!aX6du*Ali;*JK8^NX8LN^o3lavh&-bIVW*}QaTeDRh4P3w`H^j& zQ&}>>19SG07x^`M!U{*Rcf04H0!nV>oQip0M{e!m5LYWrd}%BUH-QoC-M5T2VDCW} zeoEJMuF9vxIj(?_lm0@=@kJN(S=BD}$P-PecOy|t+VLk;XP&F?ZC2;72%+OQOp@sJ zHo{q>#@+a-aH%a#JC$i9TLZvD?|TFk%-AGb-N5Q!EVq>vaQzq8de3}iQZmupezYL((_DP_Pu+bB@uqTKx)Cue_`>^1(`#%s=mOd%-;twr zi*P;at4nr)g1nfYk&j-QFv(5ozIE+@#;bQ95G8i$J8G&eN;MeOJ8xCP&u(5M~)?<`5r^r0{6YVoBN^{jxiprWLBB58{fFd1(%U->rY|77ghXDlAn{e z6V*OmHDjBK#NI{LaxAsL#3~t3F;J5ecYm2zRmNxj!YwWE*Ii_HNHtW~r`v9=xJS{C z`Q6%mm;H}kqbmwe9JL?giV`2S)R1;RdslRj8ED`wl+Kee$SEbh)Z2)Lxm_US&Eu>W zSKB6vv_D@KyLe=|66LypMem-nb}?+e%0hdr?v_nx z^rqP6jbz_PZ1S0CFXLUHA-<2MY(G6mq7hB({V-m_ns-;d)H?@+rb^S8Yhp_5D(DmK z*i<-!wY5VlCOZ7J+TSta-AZ+Nz2%97S9t1X^?rW)i>O&!7Wx^3TM9yVy@bR=QIn+6 z9Bm%V=g(CsU?g%U+3GsAZ)V#3`goUYlgwQ#>yAF0D*XW0jcm`EOaqx_Oa%|7c}orSfb6183|w_Ervd)$BNdG45> zE?Jl)rg8I$(Lmytn^E4VCA{6gM9YgGmtuIIb#$|1TDa_EGZnIj*@~a`~+cZ8COeZTx`py`5cFJ4|GXem>$ z2lnYQPK8O#v6Ygi6VF={o^@jw#LWaGLZyn`0!}X--dwbV?{u?DacxenO*8i@-dR_c za`>!%pX0$`ck)cxe209s?YUZ9x7CYnZ}RPQCNb~Cg4xO9GpF)Cm(QfPw0a%$Ysjn# zY}}5lE4lXr!v~b}2cVuA!M}ch1aAcSVW3=;F#8Sie;Yw-;8mVQnJ;}?dmr1LkIf0j zkr9(ED^@RMVRG)(y3XG{FyZwFx(-@nLDL!#x%^fenM~!dSqQ>7&X{GHLooDTQ ze8TMYonuRBsOsH1rb=Ypx;q4KY*^Wo$czl5+hlSUk7B#O;3y1QLA&4TUe+?5)ArzU zc~7W9t@fzpb47faXIjKQ`R}Xm|g5}`ot&~RMPZX`vH#x zHRDmtkBp1N1V`ac3%0kav@kMo;639g?jvhqYd+cmvhf3DXT9fv!^t4JE?10T<$St+ zYj4h#1%e(>sRr%=l+OCOmj>e#7W2y5XivO6b@S5oc^s)Un9f_fL{@hi8YMtTQD8W^xHj3|Po6 z@f)6+W|QkToRgnTv9)t>KQuhxJtO^9@^+T_)0U?FjHVqgw;{CS5y!LYb+zFB!H&)-sSZjA9R&J=JL9uCZYVwRn`3;8rd~Ca@nE4Y^+^rX3Xoa(QapCG!-)P7? z=xA))d@}ZzjXUd*dVl@WxM4N#^7MXII#+XS$T7uw=eD6k4VlYR7QA;B0Y2|Fr}@pO zHdfgV9ehLRwFr0-lCkVa&b61bhM85|EOUP{?6;RS;qrTNd~U zVux&17MS;HJM5<{Fd1Sa?L`i_8S&Jd2W~|?1r~zU5KlwJU zuLSQPo+w{~4-qfaNV=+7X#ki9YhzJ+)q9|n6MG(Q^y?;$Kl(LFg-0up-x zmWy0jUf(nUwnF?SN^-bX+X=*V3;5nbxaB$nMunMAgU7ETDm(-JecVF!EO-!63|tn$ z>j)DKm%*WkLok-t!8Z_^QMSMz5ZsY%Fge1mt#4o*#HkuZ4A#iKK*>l%LAZk70wIE3 zwm=B#pZ)-kA&!ACH%(xzz(+!oVu6{RfXHDta=|3^X$Rm|{4100Lm&xP3ShgakcBJK zFd77c&^e6`IY;Q6!h+Z!?7f2ziTiC40p$0YA%$d+rwE!51tcEv)JF-?Mm*6`L!1#( z;%Olh2%7B;2qEs3wg7HOF@np&2N^>gig7LknL|Y4qaAe10#5S*H*2O@)&0h}9fPL;>( zawSmm9s!kG@UIMDn|ZJo3V>#WovA=>AY{;~La2~Y0U|eyP76YaAVT3pbbxplHmCYW z+ZLRNk@yai6~?3Xhj>Q~AY#I*)gVm3LF5p^EA_$sF4}#7S^>aau50Qgu;Ea#E7VrY z*C+}Au?&3quAww>u3UPH3$!^Tmz8QO?bueBPN7P`dupC z!a0n@iPefXIR9HH{$(K&)8BvKxoAoZLpFl2z!WthSXZ90EQqaY08NA>a2Cp)zfgE) zDTtlHy!!)%Z z$_U4Dw18rJ0n^ZeuqG~xp}{tR|F6=2dODmK3#5milxKV`y$&-xJK3$iqqv zTd@3{MIU>83nYLo9vQ%^kMh^j8L?hVhj2#sS8eZ2z!`MFcY|y2GoF9o8afb>E2|`q znyg;}R?!1i$zFpe^IwA#Cc3Jl!k`Aft#Y_O#j6OY$OLMv$hCB7z?E!&yt$_fA-_^1 z#00&q4X9B8ER(thzkT};oEmm7=eP82-d9;Qz!89^z)Kw0b7V{O51bUnsRtptlAbKr za)AU$Rsamrx+eWn3MfY+Fqj^M^$N+AIHwKI>Np^)HLf9@Wv`HA`o9YW9WjiG00_!M zzzp$gNInGsDR~tNM3$H810^;?DUnb84h+=*LVTr(Pw&Zlcs@)3HpyH|AEp8z<^Bn! z`Pp#Y3ZO|I$mHu;fTa!~1z?K?5ITe=WJ5rcnl_-H875;0VMb7$;M9Iycm?1}L^Gs= zX_)_xqgK%_N}w9TyG3k4B+rP2vcXfXbpjYyaKAqbyH)^VWUP60a&6Dg!T&R zcG1*UL5PGD#Q9%a; zi>m~%`uh-;D+A6vaJjx?AR%Q@A|Z+W6%DV}Y=LmmiS@x0FiFQh4XLs*5E}Q0D~?72 z5iHC2clO|voRP=_8e9HHBYpH=G{yr6F(SnD4*)qGvA;PKu(OAMG(^KWsF}Yxn2A}r z`2XXczQ{8-FQD|_19JJ_{aAjmKn{@bCQQ%-!h%p_W&((Vy!=BYf=#Oa={`QdS;>U} z>lTdC^tT_0BkOM#fbNhE=n<~h!=d6UqO@-uEiLq89;Kv+RY%CFkLeU-Bp&H@L+*@0Kt}k;Op?s)<1BzD4tV^n|B6*?{<|sZxlJ4b0V*X7xF_Oz#Uve1 zT$z!C(OUe;8*U3Az2_`IbR}~4hjPfUXSr3*r)|i=c?9mVZMLb>sajMuBRkONXS5c3LgUf`Q!(``ULj`K%*vp z`-*zi!Xq+$#}cqJ`V`$m#j@thSPDG3!8Yy}a5aasQkS!F7o_7}kTDWKv28;7~zWn)RQUS6KrR zZsEeoOfcXVuNzm!OpjnIp#u$xB@r;L{+k|d3@-j(C^pQ@=67K$w>=dJ2Qujn9ugA& zHM|`;0^d)Ij1MdJ|D7Re;gXERK!!X4O2_?ccu6`0-bDZzXz^_!Jcy8)+5%?wGa`7| zw!d4lAlRy_3Q%gl0#mu`ViZ{ZLMdV5b`TDP7zaB*3^vza75x9yf?)6JpS^&~PC#M3 zUSG0#{^Cil>gQF6dh6HpSb$n{3AiD1&4msT1U|(c!gU30wUT?-1r#V9AW!aIgDQz3 zpnFotwEq;TMd72!dLZxefMRvM{cx1{izkIWefT^2;z()l-T~2n3KVbAYx1EAfAOd= z4mD)Lf3;*-4nQnY)DQq=2M7ZqM33O)c5MWC$QT(L7H5Qv_K$J5ri!7vfX}=@eN?%o z3|sGSBKCi^$!d;(K*c?DsC z-3UO&fVo-z9&x<O?W zdTaaS<=;e@u*Yr?QiQ-XH$b3L{=aw{rO4>8XC8mTcL@iOl>G%TUPa*Ab@YEr1-wZs zsr(DYgEhGSX&Hyy0WoZK2p+!2@0Rh}ZS~j0ysbw-y*wZ{uQdL>yB=trt~bN3O$exC z2QtaOW6ovQQ?Lc}0cJqOzdqwkXh)=I?zx^z=?)JyJ^|5t07OsfnmNP0h!o;ufR1+0 z-=*ai|54>Sfc*iKh3nHKg8sjFs;gjL`Jhle)58H^e^C9mn(7Y2%)KB|S15^M<0yB) ziq}AUc)j@_oq?m+lb3-k|2y1QzI{R61wg|9SFf)aBz;5}(YAt21P{#bIiiC%kQL<% z@Opj&2HZ}K4y%OouBPv8aNhTS^Ju>ST?d5^1pBHtpLQkFC1eX^Cg21T!{&>pa2k`d(&2#jFAPVeZBg4;&fU7+JlMOS+zZ3cYF3SJh=KbG^1=iVL`9%Np?@Q{v{cJ$c zw*c|i2W{@iFoF;W6%4o!91E6(4lIvxMg49Gwqs^bwEo{jM<7I^t1G>4V1g)M0YLMC z$b{GcASy4x*8xPZ<$yo!#ZNfkHrjOn4$Ku7gbGs&{3FIB5I_y!{0jwK9gYD5E*eLN zRsBV^!%^Sy{)@s4g3w=8gnzCe|8GT5CAgl53Ss@TAV6RtErPdv6U6*4lg{D1bb16Y z2MR11e`N&`!u->J#|uUtlOYSV{GWh9)b%bD%=9XZKC?G49?JpS%w#W-2y;62% zc=`?*9eh9!AH-d6?H_X?5?#f<>lUy;7X26q+Er=2TDuYBg9GR}{?n(v9i;fFxDU=2_H3;~AWDas@f zDab{13EO1p&o~_+#>o2q02~F#2#HD{TK#g8(XMKm!4O z|Cc-uC&y{Q$?PzONT6T7Ds)%N$Pfb%{N;=PS@(rKjD%21vi#<{SUj}0^zgQ}_#fO~ z-+>2UedG5BK1S3wfPIgK5MP_r!+-aX%czoW1#mTht89M~AOZg%Jwc>^IYj}}g01cV<0sAu%oq|CyQqadU(99t0XRS$9{WCbqd z%mN{V`ymJ)_SOV~xUP{e8ZO@k&Z2;|JArTzD@<@DZ}y!x@o12cyze3*FE8zHbfw>U~fyObv1rFT( zv78Si-vGYU*K}*9!lA^l;ur|&m2>Z601tT5V8OBf*VdH)MRlZMVCHeDTyn?}89Wgq zqDI{)@j{Y#yJ|KPiFmA~M5D$v<&~&0YJvyg5n2!w6chwRJP;UIP`ob`bqR`ul;vt! zAnuwN&$!>;-OPIrcdDk;{=WXteto@mTJc~{He&Wp77ecoJop6$UvKnkdR@Ld1$EpW z#DVXZ5(h z&ds**Ml+*`_ik%x9rq(97P_ozg&D=MiZ>T9#CL*S zE#mcwXUm}faZgZ#8f_e1)MbyHZ3M*O32XBoLBZvlj%-St;>bl#3UjDfjpcAOM5^64 zg*9B|+q2crY}%^ss`q({{~jegrb;d^EK2wdp8?z(Q#`ymW5)uv;;rCACxaJT7%zjm zilD4k0>bq&ZhW4=Yt;}G=q#Yoo(50VfS=F*HuD8ml}dN*_)7-N>?HULwF>S0`263 zq(nyx_i=B5iFHqqkr|KxQ4VtjWLjkKWwFgr3PELy1oW>2h2KZZo>OV#Vo_@R2D)i^ zz&#k$@b}LWps5I7b*GDTH!czUc&2pW9uzT1xw%x7Hmri$Hanz6emh9PatqX!&DqgB zUaj1Lhpgxu!rYnvj|N9ph-zH9!#{q^x`F_&#|&JZuaNR5D!(mjl_)#cV`GLrTMhUn z+Mv45BEHLa7WpPtnJ48xkFefv4fzw_l3%|@@S9=`{?wOR79c5^ur_49K!_%{2C(H}quE%{jR7a4BX(0Dx z#g|+D>2%+izd=<$sL~JX<|GZjG7S&MofH{~7}U0zoUFld7lnz-Q*WVY`9##DX!y97 z!k@nt=9Sj^Z<#(FBTh2*)bzuIdz!wIJqyW1r?R2hlEf5PQn*h;s!WmbXdtL_rUrUH z&)~|P;7W5AnE4EN74oL{{7ROl($ZZ@dmhBZU65za)ehp)+6 zmyGz%fEInr7?7vok1asU*t}cDH+eVY`R8l!K7|+@_{xXc$I16SZW$pk63tEj+_)Uk zR0>YT%RS=9-`_Zu-i0`!PFG!fnpvRXZ%$SGxIa0m&>vB#fts)K@Zs_*pAK3~vnF~C zq7PkYvA&WU8SXp7qSANKa9#=Jd++5#8%0#+b1sg@t4&UK}5p0sPj3mUwK2eAu!HfBjg4zxl(^N^AvD2J3u2>NW&6BqD?Q7W?^2 zO{FLsBc9OaDTnS?kY&X1q6vR%cx9>7fhA{1&wDSnucVYs#GF7M;|wDU*oXN*lOt}2 zq1R&Eset`3It0)`Mi&;fRa#ly=pH;5K756*zDEdj75p`sikDdH@zJQ(m2JsZyq(qQD?j4L ze$9mWu5KJ;j}Iue?42w*h5t4_J%2UYz}MIm*N^JgRlYs*^EP^j{U>sKjz{W!+7DZd z(Nf=VGQ2HH?AarKqnA1veU$|xs{J&$siRRdS)FnK_`a?hyr~E91sI}v4vGoS4Yc50 zd8tn}O2_vWrQ@@e03K=gVf_g%?W0WYJhf*#;_d_EPE@mOqN1^6&lC!f(U6e=0k=5xLO1 zAzDDQ=NrZRo%@|d(8~D)dDB5k(S0xc@MOd*WOE*7_Ike)7FnpaZ1_Q?1D6tg5K`_g z6A+Geigv9Y>TDU8_tsgY&@oRj+y&%8$nWb_ghl4bsq9LEI&afJIPNJ_*AX-Vgk`v*0nQ@<}l4kD2Mu;vQ9FX)p*HQr-q z56L0-TJFQZNVMY%cp$@d)G^6|IwZ~5mW|4nP4KgP*xY)zfZpsiI2ia zGZ1F#b+0J2@D&zaim)^tlOg|;vA)u<$0y1Af=R^=sLCISDM>80- z8F3BMC92ms!S}gr^kt!?l7Bw&lU}~y?#08ZzPP45%()==K}GUG8CL|Yzg!lj=cJ9EvV1Ua=fyyGJJOx zjc6pM2l~}^$X!8=D3N$cg4f#>Cn>lE_Q-y*BUF%`Q=jnx8(>$9P3rCbYu%% z6rZ=c-VKK%ZDNp^S*=>`YMI&yft^msj_BR0DXT(}izoQ%rw~uw1hv`=o4l4-)Q{+U zYB0DSNT!~@9tT`0cC)>~e|bXM>Ua_!dwUA#g^%K|ikfvJ)8Gy1OM^w7xT~Ru=7XnWkV!XoWQ-DVI7Es6j*aWuhJ~@_G4~|3r9KBj@00a#X{9C8w b-p*^1Vm(}x{`@hdd~{lIv8@S5#;pDa6B7MQ diff --git a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml similarity index 57% rename from settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml rename to settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml index 38d4e88f1..202d3d0a3 100644 --- a/settings/repository/com.google.code.cofoja/cofoja-1.0-20110609.xml +++ b/settings/repository/com.google.code.cofoja/cofoja-1.0-r139.xml @@ -1,3 +1,3 @@ - + From d56a4631dcc483580603256e7de8b6c72f662507 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 9 Aug 2012 17:08:43 -0400 Subject: [PATCH 097/176] Update cofoja version in build.xml --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index ca5d22a5a..135b9dac7 100644 --- a/build.xml +++ b/build.xml @@ -92,7 +92,7 @@ - + From d7d7ccf7898324b6218a0efe0d01af892864d2fd Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 9 Aug 2012 17:10:47 -0400 Subject: [PATCH 098/176] Revert unintentional license change --- licensing/GATK1_LICENSE | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/licensing/GATK1_LICENSE b/licensing/GATK1_LICENSE index 080ea1e68..648ec8fc3 100644 --- a/licensing/GATK1_LICENSE +++ b/licensing/GATK1_LICENSE @@ -1,6 +1,4 @@ -Copyright (c) 2013 The Broad Institute - -Extra text +Copyright (c) 2012 The Broad Institute Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation From eca9613356d2d53198671637ef9e1b18e30c0844 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 10 Aug 2012 14:54:07 -0400 Subject: [PATCH 102/176] Adding support of X and = CIGAR operators to the GATK --- .../haplotypecaller/GenotypingEngine.java | 2 + .../gatk/iterators/LocusIteratorByState.java | 2 + .../gatk/walkers/indels/IndelRealigner.java | 8 +- .../walkers/indels/SomaticIndelDetector.java | 8 +- .../broadinstitute/sting/utils/baq/BAQ.java | 2 +- .../sting/utils/sam/AlignmentUtils.java | 172 +++--------------- .../reads/TheoreticalMinimaBenchmark.java | 2 +- .../LocusIteratorByStateUnitTest.java | 40 ++++ 8 files changed, 83 insertions(+), 153 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 52c13d124..6afdc58ea 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -568,6 +568,8 @@ public class GenotypingEngine { refPos += elementLength; break; case M: + case EQ: + case X: int numSinceMismatch = -1; int stopOfMismatch = -1; int startOfMismatch = -1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 7d035d208..1606c227d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -195,6 +195,8 @@ public class LocusIteratorByState extends LocusIterator { done = true; break; case M: + case EQ: + case X: readOffset++; genomeOffset++; done = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 5e0f15e6a..d61b9e9b6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -1025,7 +1025,9 @@ public class IndelRealigner extends ReadWalker { elements.add(ce); break; case M: - altIdx += elementLength; + case EQ: + case X: + altIdx += elementLength; case N: if ( reference.length < refIdx + elementLength ) ok_flag = false; @@ -1287,6 +1289,8 @@ public class IndelRealigner extends ReadWalker { int elementLength = ce.getLength(); switch ( ce.getOperator() ) { case M: + case EQ: + case X: for (int k = 0 ; k < elementLength ; k++, refIdx++, altIdx++ ) { if ( refIdx >= reference.length ) break; @@ -1432,6 +1436,8 @@ public class IndelRealigner extends ReadWalker { fromIndex += elementLength; break; case M: + case EQ: + case X: case I: System.arraycopy(actualReadBases, fromIndex, readBases, toIndex, elementLength); System.arraycopy(actualBaseQuals, fromIndex, baseQuals, toIndex, elementLength); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java index ba16fd709..b0c09f78e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java @@ -2057,7 +2057,9 @@ public class SomaticIndelDetector extends ReadWalker { break; // do not count gaps or clipped bases case I: case M: - readLength += cel.getLength(); + case EQ: + case X: + readLength += cel.getLength(); break; // advance along the gapless block in the alignment default : throw new IllegalArgumentException("Unexpected operator in cigar string: "+cel.getOperator()); @@ -2094,7 +2096,9 @@ public class SomaticIndelDetector extends ReadWalker { break; case M: - for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) { + case EQ: + case X: + for ( int k = 0; k < ce.getLength(); k++, posOnRef++, posOnRead++ ) { if ( readBases[posOnRead] != ref[posOnRef] ) { // mismatch! mms++; mismatch_flags[posOnRef] = 1; diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 186452294..439a0d8ed 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -551,7 +551,7 @@ public class BAQ { switch (elt.getOperator()) { case N: return null; // cannot handle these case H : case P : case D: break; // ignore pads, hard clips, and deletions - case I : case S: case M: + case I : case S: case M: case EQ: case X: int prev = readI; readI += elt.getLength(); if ( includeClippedBases || elt.getOperator() != CigarOperator.S) { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 2c388a1e0..4f1e66ba2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.recalibration.EventType; import java.util.ArrayList; import java.util.Arrays; -import java.util.BitSet; public class AlignmentUtils { @@ -71,9 +70,18 @@ public class AlignmentUtils { if (readIdx > endOnRead) break; CigarElement ce = c.getCigarElement(i); + final int elementLength = ce.getLength(); switch (ce.getOperator()) { + case X: + mc.numMismatches += elementLength; + for (int j = 0; j < elementLength; j++) + mc.mismatchQualities += r.getBaseQualities()[readIdx+j]; + case EQ: + refIndex += elementLength; + readIdx += elementLength; + break; case M: - for (int j = 0; j < ce.getLength(); j++, refIndex++, readIdx++) { + for (int j = 0; j < elementLength; j++, refIndex++, readIdx++) { if (refIndex >= refSeq.length) continue; if (readIdx < startOnRead) continue; @@ -92,11 +100,11 @@ public class AlignmentUtils { break; case I: case S: - readIdx += ce.getLength(); + readIdx += elementLength; break; case D: case N: - refIndex += ce.getLength(); + refIndex += elementLength; break; case H: case P: @@ -164,6 +172,8 @@ public class AlignmentUtils { CigarElement ce = c.getCigarElement(i); int cigarElementLength = ce.getLength(); switch (ce.getOperator()) { + case EQ: + case X: case M: for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) { // are we past the ref window? @@ -204,111 +214,6 @@ public class AlignmentUtils { return sum; } - /** - * Returns the number of mismatches in the pileup element within the given reference context. - * - * @param read the SAMRecord - * @param ref the reference context - * @param maxMismatches the maximum number of surrounding mismatches we tolerate to consider a base good - * @param windowSize window size (on each side) to test - * @return a bitset representing which bases are good - */ - public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref, int maxMismatches, int windowSize) { - // first determine the positions with mismatches - int readLength = read.getReadLength(); - BitSet mismatches = new BitSet(readLength); - - // it's possible we aren't starting at the beginning of a read, - // and we don't need to look at any of the previous context outside our window - // (although we do need future context) - int readStartPos = Math.max(read.getAlignmentStart(), ref.getLocus().getStart() - windowSize); - int currentReadPos = read.getAlignmentStart(); - - byte[] refBases = ref.getBases(); - int refIndex = readStartPos - ref.getWindow().getStart(); - if (refIndex < 0) { - throw new IllegalStateException("When calculating mismatches, we somehow don't have enough previous reference context for read " + read.getReadName() + " at position " + ref.getLocus()); - } - - byte[] readBases = read.getReadBases(); - int readIndex = 0; - - Cigar c = read.getCigar(); - - for (int i = 0; i < c.numCigarElements(); i++) { - CigarElement ce = c.getCigarElement(i); - int cigarElementLength = ce.getLength(); - switch (ce.getOperator()) { - case M: - for (int j = 0; j < cigarElementLength; j++, readIndex++) { - // skip over unwanted bases - if (currentReadPos++ < readStartPos) - continue; - - // this is possible if reads extend beyond the contig end - if (refIndex >= refBases.length) - break; - - byte refChr = refBases[refIndex]; - byte readChr = readBases[readIndex]; - if (readChr != refChr) - mismatches.set(readIndex); - - refIndex++; - } - break; - case I: - case S: - readIndex += cigarElementLength; - break; - case D: - case N: - if (currentReadPos >= readStartPos) - refIndex += cigarElementLength; - currentReadPos += cigarElementLength; - break; - case H: - case P: - break; - } - } - - // all bits are set to false by default - BitSet result = new BitSet(readLength); - - int currentPos = 0, leftPos = 0, rightPos; - int mismatchCount = 0; - - // calculate how many mismatches exist in the windows to the left/right - for (rightPos = 1; rightPos <= windowSize && rightPos < readLength; rightPos++) { - if (mismatches.get(rightPos)) - mismatchCount++; - } - if (mismatchCount <= maxMismatches) - result.set(currentPos); - - // now, traverse over the read positions - while (currentPos < readLength) { - // add a new rightmost position - if (rightPos < readLength && mismatches.get(rightPos++)) - mismatchCount++; - // re-penalize the previous position - if (mismatches.get(currentPos++)) - mismatchCount++; - // don't penalize the current position - if (mismatches.get(currentPos)) - mismatchCount--; - // subtract the leftmost position - if (leftPos < currentPos - windowSize && mismatches.get(leftPos++)) - mismatchCount--; - - if (mismatchCount <= maxMismatches) - result.set(currentPos); - } - - return result; - } - /** * Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment. * This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but @@ -367,45 +272,6 @@ public class AlignmentUtils { return n; } - public static byte[] alignmentToByteArray(final Cigar cigar, final byte[] read, final byte[] ref) { - - final byte[] alignment = new byte[read.length]; - int refPos = 0; - int alignPos = 0; - - for (int iii = 0; iii < cigar.numCigarElements(); iii++) { - - final CigarElement ce = cigar.getCigarElement(iii); - final int elementLength = ce.getLength(); - - switch (ce.getOperator()) { - case I: - case S: - for (int jjj = 0; jjj < elementLength; jjj++) { - alignment[alignPos++] = '+'; - } - break; - case D: - case N: - refPos += elementLength; - break; - case M: - for (int jjj = 0; jjj < elementLength; jjj++) { - alignment[alignPos] = ref[refPos]; - alignPos++; - refPos++; - } - break; - case H: - case P: - break; - default: - throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); - } - } - return alignment; - } - public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) { int numHQSoftClips = 0; @@ -426,6 +292,8 @@ public class AlignmentUtils { break; case M: case I: + case EQ: + case X: alignPos += elementLength; break; case H: @@ -488,6 +356,8 @@ public class AlignmentUtils { } break; case M: + case EQ: + case X: if (pos + elementLength - 1 >= pileupOffset) { return alignmentPos + (pileupOffset - pos); } else { @@ -519,6 +389,8 @@ public class AlignmentUtils { case D: case N: case M: + case EQ: + case X: alignmentLength += elementLength; break; case I: @@ -565,6 +437,8 @@ public class AlignmentUtils { } break; case M: + case EQ: + case X: for (int jjj = 0; jjj < elementLength; jjj++) { alignment[alignPos] = read[readPos]; alignPos++; @@ -798,6 +672,8 @@ public class AlignmentUtils { switch (ce.getOperator()) { case M: + case EQ: + case X: readIndex += length; refIndex += length; totalRefBases += length; diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/TheoreticalMinimaBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/TheoreticalMinimaBenchmark.java index 8e67c9efc..1abca5487 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/TheoreticalMinimaBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/TheoreticalMinimaBenchmark.java @@ -94,7 +94,7 @@ public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark { int elementSize = cigarElement.getLength(); while(elementSize > 0) { switch(cigarElement.getOperator()) { - case M: matchMismatches++; break; + case M: case EQ: case X: matchMismatches++; break; case I: insertions++; break; case D: deletions++; break; default: others++; break; diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index 218548b00..dc908c323 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -41,6 +41,46 @@ public class LocusIteratorByStateUnitTest extends BaseTest { return new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups()); } + @Test + public void testXandEQOperators() { + final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; + final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'}; + + // create a test version of the Reads object + ReadProperties readAttributes = createTestReadProperties(); + + SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10); + r1.setReadBases(bases1); + r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); + r1.setCigarString("10M"); + + SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10); + r2.setReadBases(bases2); + r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); + r2.setCigarString("3=1X5=1X"); + + SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10); + r3.setReadBases(bases2); + r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20}); + r3.setCigarString("3=1X5M1X"); + + SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10); + r4.setReadBases(bases2); + r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20}); + r4.setCigarString("10M"); + + List reads = Arrays.asList(r1, r2, r3, r4); + + // create the iterator by state with the fake reads and fake records + li = makeLTBS(reads,readAttributes); + + while (li.hasNext()) { + AlignmentContext context = li.next(); + ReadBackedPileup pileup = context.getBasePileup(); + Assert.assertEquals(pileup.depthOfCoverage(), 4); + } + } + @Test public void testIndelsInRegularPileup() { final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; From 40f0320a1c44248ed595e3db341204ee5576e478 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 10 Aug 2012 14:58:29 -0400 Subject: [PATCH 103/176] When adding a unit test to LIBS for X and = CIGAR operators, I uncovered a bug with the implementation of the ReadBackedPileup.depthOfCoverage() method. --- .../sting/utils/pileup/AbstractReadBackedPileup.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index e71cd01be..3d986f666 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -155,7 +155,7 @@ public abstract class AbstractReadBackedPileup pileup) { size += pileup.getNumberOfElements(); - abstractSize += pileup.depthOfCoverage(); + abstractSize = pileup.depthOfCoverage() + (abstractSize == -1 ? 0 : abstractSize); nDeletions += pileup.getNumberOfDeletions(); nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads(); } From 4968daf0a5b46cecb83838387ab3a6eedc431bf8 Mon Sep 17 00:00:00 2001 From: Ami Levy Moonshine Date: Fri, 10 Aug 2012 16:58:05 -0400 Subject: [PATCH 104/176] update integration tests at CombineVariantsIntegrationTest --- .../variantutils/CombineVariantsIntegrationTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 3b60fa2c2..c8551657a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -142,10 +142,10 @@ public class CombineVariantsIntegrationTest extends WalkerTest { cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } - @Test public void complexTestFull() { combineComplexSites("", "151a4970367dd3e73ba3e7f3c2f874f6"); } - @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "c0625e092b878b3d3eb1703c48e216b7"); } - @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "6978329d6a1033ac16f83b49072c679b"); } - @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "6978329d6a1033ac16f83b49072c679b"); } + @Test public void complexTestFull() { combineComplexSites("", "9d989053826ffe5bef7c4e05ac51bcca"); } + @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "4f38d9fd30a7ae83e2a7dec265a28772"); } + @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "46bbbbb8fc9ae6467a4f8fe35b8d7d14"); } + @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "46bbbbb8fc9ae6467a4f8fe35b8d7d14"); } @Test public void combineDBSNPDuplicateSites() { From 458bbdee8fea389ab7e3f554bd7759d37957cc0e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 12 Aug 2012 10:27:05 -0400 Subject: [PATCH 105/176] Add useful logger.info telling us the mismatch and indel context sizes --- .../utils/recalibration/covariates/ContextCovariate.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 4c20284d9..13ea0f0b3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils.recalibration.covariates; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; @@ -43,6 +44,7 @@ import java.util.ArrayList; */ public class ContextCovariate implements StandardCovariate { + private final static Logger logger = Logger.getLogger(ContextCovariate.class); private int mismatchesContextSize; private int indelsContextSize; @@ -63,6 +65,9 @@ public class ContextCovariate implements StandardCovariate { public void initialize(final RecalibrationArgumentCollection RAC) { mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE; indelsContextSize = RAC.INDELS_CONTEXT_SIZE; + + logger.info("Context sizes: base substitution model " + mismatchesContextSize + ", indel substitution model " + indelsContextSize); + if (mismatchesContextSize > MAX_DNA_CONTEXT) throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize)); if (indelsContextSize > MAX_DNA_CONTEXT) From 243af0adb19680e8d560842bab793e8e598e19b7 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 12 Aug 2012 13:44:07 -0400 Subject: [PATCH 106/176] Expanded the BQSR reporting script -- Includes header page -- Table of arguments (Arguments) -- Summary of counts (RecalData0) -- Summary of counts by qual (RecalData1) -- Fixed bug in output that resulted in covariates list always being null (updated md5s accordingly) -- BQSR.R loads all relevant libaries now, include gplots, grid, and gsalib to run correctly --- .../walkers/bqsr/BQSRIntegrationTest.java | 28 +++++----- .../sting/utils/recalibration/BQSR.R | 53 +++++++++++++++---- .../bqsr/RecalibrationArgumentCollection.java | 4 +- .../sting/utils/recalibration/RecalUtils.java | 23 ++++++-- 4 files changed, 79 insertions(+), 29 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index cf6d1cd77..0c212763d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -50,20 +50,20 @@ public class BQSRIntegrationTest extends WalkerTest { String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; String HiSeqInterval = "chr1:10,000,000-10,100,000"; return new Object[][]{ - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")}, - {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "1cfc73371abb933ca26496745d105ff0")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "ee5142776008741b1b2453b1258c6d99")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "fbc520794f0f98d52159de956f7217f1")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab5b93794049c514bf8e407019d76b67")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "81df636e3d0ed6f16113517e0169bc96")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "ad3c47355448f8c45e172c6e1129c65d")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "fef7240140a9b6d6335ce009fa4edec5")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "600652ee49b9ce1ca2d8ee2d8b7c8211")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "769f95b9dcc78a405d3e6b191e5a19f5")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")}, + {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")}, }; } diff --git a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R index 6c4dace1d..4fa1c9739 100644 --- a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R +++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R @@ -1,8 +1,18 @@ library("ggplot2") +library(gplots) +library("reshape") +library("grid") library("tools") #For compactPDF in R 2.13+ +library(gsalib) -args <- commandArgs(TRUE) + +if ( interactive() ) { + args <- c("NA12878.6.1.dedup.realign.recal.bqsr.grp.csv", "NA12878.6.1.dedup.realign.recal.bqsr.grp", NA) +} else { + args <- commandArgs(TRUE) +} data <- read.csv(args[1]) +gsa.report <- gsa.read.gatkreport(args[2]) data <- within(data, EventType <- factor(EventType, levels = rev(levels(EventType)))) numRG = length(unique(data$ReadGroup)) @@ -82,20 +92,45 @@ for(cov in levels(data$CovariateName)) { # for each covariate in turn p <- ggplot(d, aes(x=CovariateValue)) + xlab(paste(cov,"Covariate")) + - ylab("Number of Observations") + + ylab("No. of Observations (area normalized)") + blankTheme - d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations),alpha=0.6,binwidth=1,position="identity") + scale_fill_manual(values=c("maroon1","blue")) + facet_grid(.~EventType) + - scale_y_continuous(formatter="comma") - + d <- p + geom_histogram(aes(fill=Recalibration,weight=Observations,y=..ndensity..),alpha=0.6,binwidth=1,position="identity") + d <- d + scale_fill_manual(values=c("maroon1","blue")) + d <- d + facet_grid(.~EventType) +# d <- d + scale_y_continuous(formatter="comma") } } -pdf(args[2],height=9,width=15) +if ( ! is.na(args[3]) ) + pdf(args[3],height=9,width=15) + +#frame() +textplot(gsa.report$Arguments, show.rownames=F) +title( + main="GATK BaseRecalibration report", + sub=date()) + distributeGraphRows(list(a,b,c), c(1,1,1)) distributeGraphRows(list(d,e,f), c(1,1,1)) -dev.off() +# format the overall information +rt0 <- data.frame( + ReadGroup = gsa.report$RecalTable0$ReadGroup, + EventType = gsa.report$RecalTable0$EventType, + EmpiricalQuality = sprintf("%.1f", gsa.report$RecalTable0$EmpiricalQuality), + EstimatedQReported = sprintf("%.1f", gsa.report$RecalTable0$EstimatedQReported), + Observations = sprintf("%.2e", gsa.report$RecalTable0$Observations), + Errors = sprintf("%.2e", gsa.report$RecalTable0$Errors)) +textplot(t(rt0), show.colnames=F) +title("Overall error rates by event type") -if (exists('compactPDF')) { - compactPDF(args[2]) +# plot per quality score recalibration table +textplot(gsa.report$RecalTable1, show.rownames=F) +title("Rrror rates by event type and initial quality score") + +if ( ! is.na(args[3]) ) { + dev.off() + if (exists('compactPDF')) { + compactPDF(args[2]) + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index f04e4a1b3..f4b00925e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -175,12 +175,12 @@ public class RecalibrationArgumentCollection { public File recalibrationReport = null; - public GATKReportTable generateReportTable() { + public GATKReportTable generateReportTable(final String covariateNames) { GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); argumentsTable.addColumn("Argument"); argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); argumentsTable.addRowID("covariate", true); - argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); + argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames); argumentsTable.addRowID("no_standard_covs", true); argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); argumentsTable.addRowID("run_without_dbsnp", true); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index fe6ef7018..a605c4649 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -326,9 +326,23 @@ public class RecalUtils { } public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) { - outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile); + outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile); } + /** + * Return a human-readable string representing the used covariates + * + * @param requestedCovariates a vector of covariates + * @return a non-null comma-separated string + */ + public static String covariateNames(final Covariate[] requestedCovariates) { + final List names = new ArrayList(requestedCovariates.length); + for ( final Covariate cov : requestedCovariates ) + names.add(cov.getClass().getSimpleName()); + return Utils.join(",", names); + } + + public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) { outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile); } @@ -352,7 +366,7 @@ public class RecalUtils { return new Pair(deltaTableStream, deltaTableFileName); } - private static void outputRecalibrationPlot(Pair files, boolean keepIntermediates) { + private static void outputRecalibrationPlot(final File gatkReportFilename, Pair files, boolean keepIntermediates) { final File csvFileName = files.getSecond(); final File plotFileName = new File(csvFileName + ".pdf"); files.getFirst().close(); @@ -360,6 +374,7 @@ public class RecalUtils { final RScriptExecutor executor = new RScriptExecutor(); executor.addScript(new Resource(SCRIPT_FILE, RecalUtils.class)); executor.addArgs(csvFileName.getAbsolutePath()); + executor.addArgs(gatkReportFilename.getAbsolutePath()); executor.addArgs(plotFileName.getAbsolutePath()); executor.exec(); @@ -372,14 +387,14 @@ public class RecalUtils { public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) { final Pair files = initializeRecalibrationPlot(filename); writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true); - outputRecalibrationPlot(files, keepIntermediates); + outputRecalibrationPlot(filename, files, keepIntermediates); } public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) { final Pair files = initializeRecalibrationPlot(filename); writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true); writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false); - outputRecalibrationPlot(files, keepIntermediates); + outputRecalibrationPlot(filename, files, keepIntermediates); } private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) { From f032e0aba46133bdb2c180b084af83e266256462 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 12 Aug 2012 13:45:52 -0400 Subject: [PATCH 107/176] A bit better output for ContextCovariate context size logging --- .../sting/utils/recalibration/covariates/ContextCovariate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 13ea0f0b3..570944245 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -66,7 +66,7 @@ public class ContextCovariate implements StandardCovariate { mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE; indelsContextSize = RAC.INDELS_CONTEXT_SIZE; - logger.info("Context sizes: base substitution model " + mismatchesContextSize + ", indel substitution model " + indelsContextSize); + logger.info("\t\tContext sizes: base substitution model " + mismatchesContextSize + ", indel substitution model " + indelsContextSize); if (mismatchesContextSize > MAX_DNA_CONTEXT) throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize)); From aab417c94d47f48a39dbf3dc8cf5d8d2f6280e46 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 12 Aug 2012 13:58:14 -0400 Subject: [PATCH 108/176] Fix missing argument in unittest --- .../sting/utils/recalibration/RecalibrationReportUnitTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java index 387cc94d6..485da243f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java @@ -98,7 +98,7 @@ public class RecalibrationReportUnitTest { } Assert.assertEquals(nKeys, expectedKeys); - final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable(), RAC); + final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable("ignore"), RAC); File output = new File("RecalibrationReportUnitTestOutuput.grp"); PrintStream out; From 4cbd11faf5a7331e1f95daf66d0f86f65c8f4833 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 13 Aug 2012 10:01:33 -0400 Subject: [PATCH 109/176] Fixed spelling error in BQSR.R --- .../scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R index 4fa1c9739..8a9eecf48 100644 --- a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R +++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R @@ -126,7 +126,7 @@ title("Overall error rates by event type") # plot per quality score recalibration table textplot(gsa.report$RecalTable1, show.rownames=F) -title("Rrror rates by event type and initial quality score") +title("Error rates by event type and initial quality score") if ( ! is.na(args[3]) ) { dev.off() From 4d3fad38e96b552b70f57ab632ce8afab2abb1f1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 13 Aug 2012 14:20:26 -0400 Subject: [PATCH 110/176] Increase allowable range for BCF2 by -1 on low-end --- .../broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java index 6fd698ff6..8a9ba87ac 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java @@ -48,8 +48,7 @@ public enum BCF2Type { } }, - // todo -- confirm range - INT8 (1, 1, 0xFFFFFF80, -127, 127) { + INT8 (1, 1, 0xFFFFFF80, -128, 127) { @Override public int read(final InputStream in) throws IOException { return BCF2Utils.readByte(in); @@ -61,7 +60,7 @@ public enum BCF2Type { } }, - INT16(2, 2, 0xFFFF8000, -32767, 32767) { + INT16(2, 2, 0xFFFF8000, -32768, 32767) { @Override public int read(final InputStream in) throws IOException { final int b2 = BCF2Utils.readByte(in) & 0xFF; @@ -77,7 +76,7 @@ public enum BCF2Type { } }, - INT32(3, 4, 0x80000000, -2147483647, 2147483647) { + INT32(3, 4, 0x80000000, -2147483648, 2147483647) { @Override public int read(final InputStream in) throws IOException { final int b4 = BCF2Utils.readByte(in) & 0xFF; From 6ad75d2f5c72cbfe5e4e9d02670a50520d42b74f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 13 Aug 2012 15:06:28 -0400 Subject: [PATCH 111/176] Reverting changes to BCF2 ranges -- The previously expanded ones are actually the missing values in the range. The previous ranges were correct. Removed the TODO to confirm them, as they are now officially confirmed --- .../broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java index 8a9ba87ac..1162a5d1e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java @@ -48,7 +48,7 @@ public enum BCF2Type { } }, - INT8 (1, 1, 0xFFFFFF80, -128, 127) { + INT8 (1, 1, 0xFFFFFF80, -127, 127) { @Override public int read(final InputStream in) throws IOException { return BCF2Utils.readByte(in); @@ -60,7 +60,7 @@ public enum BCF2Type { } }, - INT16(2, 2, 0xFFFF8000, -32768, 32767) { + INT16(2, 2, 0xFFFF8000, -32767, 32767) { @Override public int read(final InputStream in) throws IOException { final int b2 = BCF2Utils.readByte(in) & 0xFF; @@ -76,7 +76,7 @@ public enum BCF2Type { } }, - INT32(3, 4, 0x80000000, -2147483648, 2147483647) { + INT32(3, 4, 0x80000000, -2147483647, 2147483647) { @Override public int read(final InputStream in) throws IOException { final int b4 = BCF2Utils.readByte(in) & 0xFF; From 22b4466cf546a587a55a92eb23a624dd249b69db Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Mon, 13 Aug 2012 15:37:35 -0400 Subject: [PATCH 112/176] Added setupRetry() to modify jobs when Queue is run with '-retry' and jobs are about to restart after an error. Implemented a mixin called "RetryMemoryLimit" which will by default double the memory. GridEngine memory request parameter can be selected on the command line via '-resMemReqParam mem_free' or '-resMemReqParam virtual_free'. Java optimizations now enabled by default: - Only 4 GC threads instead of each job using java's default O(number of cores) GC threads. Previously on a machine with N cores if you have N jobs running and java allocates N GC threads by default, then the machines are using up to N^2 threads if all jobs are in heavy GC (thanks elauzier). - Exit if GC spends more than 50% of time in GC (thanks ktibbett). - Exit if GC reclaims lest than 10% of max heap (thanks ktibbett). Added a -noGCOpt command line option to disable new java optimizations. --- .../examples/ExampleRetryMemoryLimit.scala | 22 ++++++ .../examples/ExampleUnifiedGenotyper.scala | 7 +- .../sting/queue/QSettings.scala | 9 +++ .../sting/queue/engine/FunctionEdge.scala | 19 ++--- .../sting/queue/engine/QGraph.scala | 29 +++---- .../gridengine/GridEngineJobRunner.scala | 10 +-- .../queue/function/CommandLineFunction.scala | 11 ++- .../function/JavaCommandLineFunction.scala | 52 ++++++++++++- .../sting/queue/function/QFunction.scala | 12 +++ .../queue/function/RetryMemoryLimit.scala | 78 +++++++++++++++++++ .../ExampleRetryMemoryLimitPipelineTest.scala | 44 +++++++++++ .../ExampleUnifiedGenotyperPipelineTest.scala | 47 ++++++----- 12 files changed, 279 insertions(+), 61 deletions(-) create mode 100644 public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala create mode 100644 public/scala/src/org/broadinstitute/sting/queue/function/RetryMemoryLimit.scala create mode 100644 public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala new file mode 100644 index 000000000..09a24e782 --- /dev/null +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala @@ -0,0 +1,22 @@ +import org.broadinstitute.sting.queue.function.RetryMemoryLimit +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ + +class ExampleRetryMemoryLimit extends QScript { + @Input(doc="The reference file for the bam files.", shortName="R") + var referenceFile: File = _ + + @Input(doc="Bam file to genotype.", shortName="I") + var bamFile: File = _ + + def script() { + val ug = new UnifiedGenotyper with RetryMemoryLimit + // First run with 1m + ug.memoryLimit = .001 + // On retry run with 1g + ug.retryMemoryFunction = (d => d * 1000) + ug.reference_sequence = referenceFile + ug.input_file = Seq(bamFile) + add(ug) + } +} diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala index 8cb86db0b..f5d750ac3 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala @@ -5,7 +5,8 @@ import org.broadinstitute.sting.queue.extensions.gatk._ /** * An example building on the intro ExampleCountReads.scala. - * Runs an INCOMPLETE version of the UnifiedGenotyper with VariantEval and optional VariantFiltration. + * Runs an INCOMPLETE variant calling pipeline with just the UnifiedGenotyper, VariantEval and optional VariantFiltration. + * For a complete description of the suggested for a variant calling pipeline see the latest version of the Best Practice Variant Detection document */ class ExampleUnifiedGenotyper extends QScript { // Create an alias 'qscript' to be able to access variables @@ -43,14 +44,12 @@ class ExampleUnifiedGenotyper extends QScript { } def script() { - // Create the four function that we can run. + // Create the four functions that we may run depending on options. val genotyper = new UnifiedGenotyper with UnifiedGenotyperArguments val variantFilter = new VariantFiltration with UnifiedGenotyperArguments val evalUnfiltered = new VariantEval with UnifiedGenotyperArguments val evalFiltered = new VariantEval with UnifiedGenotyperArguments - // If you are running this on a compute farm, make sure that the Sting/shell - // folder is in your path to use mergeText.sh and splitIntervals.sh. genotyper.scatterCount = 3 genotyper.input_file :+= qscript.bamFile genotyper.out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf") diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala index d9fed4ce8..1a50301f1 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala @@ -55,12 +55,18 @@ class QSettings { @Argument(fullName="memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false) var memoryLimit: Option[Double] = None + @Argument(fullName="memory_limit_threshold", shortName="memLimitThresh", doc="After passing this threshold stop increasing memory limit for jobs, in gigabytes.", required=false) + var memoryLimitThreshold: Option[Double] = None + @Argument(fullName="resident_memory_limit", shortName="resMemLimit", doc="Default resident memory limit for jobs, in gigabytes.", required=false) var residentLimit: Option[Double] = None @Argument(fullName="resident_memory_request", shortName="resMemReq", doc="Default resident memory request for jobs, in gigabytes.", required=false) var residentRequest: Option[Double] = None + @Argument(fullName="resident_memory_request_parameter", shortName="resMemReqParam", doc="Parameter for resident memory requests. By default not requested.", required=false) + var residentRequestParameter: String = _ + /** The name of the parallel environment (required for SGE, for example) */ @Argument(fullName="job_parallel_env", shortName="jobParaEnv", doc="An SGE style parallel environment to use for jobs requesting more than 1 core. Equivalent to submitting jobs with -pe ARG nt for jobs with nt > 1", required=false) var parallelEnvironmentName: String = "smp_pe" // Broad default @@ -68,6 +74,9 @@ class QSettings { @Argument(fullName="dontRequestMultipleCores", shortName="multiCoreJerk", doc="If provided, Queue will not request multiple processors for jobs using multiple processors. Sometimes you eat the bear, sometimes the bear eats you.", required=false) var dontRequestMultipleCores: Boolean = false + @Argument(fullName="disableDefaultJavaGCOptimizations", shortName="noGCOpt", doc="If provided, Queue will not ensure that java GC threads are limited and that the a minimum amount of time is spent in GC.") + var disableDefaultJavaGCOptimizations = false + @Argument(fullName="run_directory", shortName="runDir", doc="Root directory to run functions from.", required=false) var runDirectory = new File(".") diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala index 8225d28ab..2d4ff60f5 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala @@ -40,11 +40,6 @@ import org.apache.commons.lang.StringUtils class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNode) extends QEdge with Logging { var runner: JobRunner[_] =_ - /** - * The number of times this edge has been run. - */ - var retries = 0 - /** * The depth of this edge in the graph. */ @@ -87,14 +82,14 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod runner.init() runner.start() } catch { - case e => + case e: Throwable => currentStatus = RunnerStatus.FAILED try { runner.cleanup() function.failOutputs.foreach(_.createNewFile()) writeStackTrace(e) } catch { - case _ => /* ignore errors in the exception handler */ + case _: Throwable => /* ignore errors in the exception handler */ } logger.error("Error: " + function.description, e) } @@ -114,7 +109,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod runner.cleanup() function.failOutputs.foreach(_.createNewFile()) } catch { - case _ => /* ignore errors in the error handler */ + case _: Throwable => /* ignore errors in the error handler */ } logger.error("Error: " + function.description) tailError() @@ -123,19 +118,19 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod runner.cleanup() function.doneOutputs.foreach(_.createNewFile()) } catch { - case _ => /* ignore errors in the done handler */ + case _: Throwable => /* ignore errors in the done handler */ } logger.info("Done: " + function.description) } } catch { - case e => + case e: Throwable => currentStatus = RunnerStatus.FAILED try { runner.cleanup() function.failOutputs.foreach(_.createNewFile()) writeStackTrace(e) } catch { - case _ => /* ignore errors in the exception handler */ + case _: Throwable => /* ignore errors in the exception handler */ } logger.error("Error retrieving status: " + function.description, e) } @@ -168,6 +163,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod currentStatus = RunnerStatus.PENDING if (cleanOutputs) function.deleteOutputs() + function.jobErrorLines = Nil runner = null } @@ -189,6 +185,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod val tailLines = IOUtils.tail(errorFile, maxLines) val nl = "%n".format() val summary = if (tailLines.size > maxLines) "Last %d lines".format(maxLines) else "Contents" + this.function.jobErrorLines = collection.JavaConversions.asScalaIterable(tailLines).toSeq logger.error("%s of %s:%n%s".format(summary, errorFile, StringUtils.join(tailLines, nl))) } else { logger.error("Unable to access log file: %s".format(errorFile)) diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala index cee2c6e56..e3a1714ff 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala @@ -116,7 +116,7 @@ class QGraph extends Logging { val isReady = numMissingValues == 0 if (this.jobGraph.edgeSet.isEmpty) { - logger.warn("Nothing to run! Were any Functions added?"); + logger.warn("Nothing to run! Were any Functions added?") } else if (settings.getStatus) { logger.info("Checking pipeline status.") logStatus() @@ -320,7 +320,7 @@ class QGraph extends Logging { if (settings.startFromScratch) logger.info("Will remove outputs from previous runs.") - updateGraphStatus(false) + updateGraphStatus(cleanOutputs = false) var readyJobs = getReadyJobs while (running && readyJobs.size > 0) { @@ -361,7 +361,7 @@ class QGraph extends Logging { * Logs job statuses by traversing the graph and looking for status-related files */ private def logStatus() { - updateGraphStatus(false) + updateGraphStatus(cleanOutputs = false) doStatus(status => logger.info(status)) } @@ -388,7 +388,7 @@ class QGraph extends Logging { if (settings.startFromScratch) logger.info("Removing outputs from previous runs.") - updateGraphStatus(true) + updateGraphStatus(cleanOutputs = true) var readyJobs = TreeSet.empty[FunctionEdge](functionOrdering) readyJobs ++= getReadyJobs @@ -473,7 +473,7 @@ class QGraph extends Logging { logStatusCounts() deleteCleanup(-1) } catch { - case e => + case e: Throwable => logger.error("Uncaught error running jobs.", e) throw e } finally { @@ -662,11 +662,12 @@ class QGraph extends Logging { private def checkRetryJobs(failed: Set[FunctionEdge]) { if (settings.retries > 0) { for (failedJob <- failed) { - if (failedJob.function.jobRestartable && failedJob.retries < settings.retries) { - failedJob.retries += 1 - failedJob.resetToPending(true) + if (failedJob.function.jobRestartable && failedJob.function.retries < settings.retries) { + failedJob.function.retries += 1 + failedJob.function.setupRetry() + failedJob.resetToPending(cleanOutputs = true) logger.info("Reset for retry attempt %d of %d: %s".format( - failedJob.retries, settings.retries, failedJob.function.description)) + failedJob.function.retries, settings.retries, failedJob.function.description)) statusCounts.failed -= 1 statusCounts.pending += 1 } else { @@ -733,7 +734,7 @@ class QGraph extends Logging { private def emailDescription(edge: FunctionEdge) = { val description = new StringBuilder if (settings.retries > 0) - description.append("Attempt %d of %d.%n".format(edge.retries + 1, settings.retries + 1)) + description.append("Attempt %d of %d.%n".format(edge.function.retries + 1, settings.retries + 1)) description.append(edge.function.description) description.toString() } @@ -1077,7 +1078,7 @@ class QGraph extends Logging { runner.checkUnknownStatus() } } catch { - case e => /* ignore */ + case e: Throwable => /* ignore */ } } } @@ -1119,20 +1120,20 @@ class QGraph extends Logging { try { manager.tryStop(managerRunners) } catch { - case e => /* ignore */ + case e: Throwable => /* ignore */ } for (runner <- managerRunners) { try { runner.cleanup() } catch { - case e => /* ignore */ + case e: Throwable => /* ignore */ } } } finally { try { manager.exit() } catch { - case e => /* ignore */ + case e: Throwable => /* ignore */ } } } diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala index 76cefe2a5..0c94e9ecf 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/gridengine/GridEngineJobRunner.scala @@ -52,13 +52,9 @@ class GridEngineJobRunner(session: Session, function: CommandLineFunction) exten nativeSpec += " -q " + function.jobQueue // If the resident set size is requested pass on the memory request - // NOTE: 12/20/11: depristo commented this out because mem_free isn't - // such a standard feature in SGE (gsa-engineering queue doesn't support it) - // requiring it can make SGE not so usable. It's dangerous to not enforce - // that we have enough memory to run our jobs, but I'd rather be dangerous - // than not be able to run my jobs at all. -// if (function.residentRequest.isDefined) -// nativeSpec += " -l mem_free=%dM".format(function.residentRequest.map(_ * 1024).get.ceil.toInt) + // mem_free is the standard, but may also be virtual_free or even not available + if (function.qSettings.residentRequestParameter != null && function.residentRequest.isDefined) + nativeSpec += " -l %s=%dM".format(function.qSettings.residentRequestParameter, function.residentRequest.map(_ * 1024).get.ceil.toInt) // If the resident set size limit is defined specify the memory limit if (function.residentLimit.isDefined) diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala index eff4a2ba9..84b625760 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala @@ -137,12 +137,17 @@ trait CommandLineFunction extends QFunction with Logging { if (residentRequest.isEmpty) residentRequest = memoryLimit - if (residentLimit.isEmpty) - residentLimit = residentRequest.map( _ * 1.2 ) + if (residentLimit.isEmpty || residentLimit == residentRequest) + residentLimit = residentRequest.map(residentLimitBuffer) super.freezeFieldValues() } + /** + * @return A function that decides how much memory cushion to add to the residentRequest to create the residentLimit + */ + def residentLimitBuffer: (Double => Double) = (1.2 * _) + /** * Safely construct a full required command-line argument with consistent quoting, whitespace separation, etc. * @@ -223,7 +228,7 @@ trait CommandLineFunction extends QFunction with Logging { */ protected def conditional( condition: Boolean, param: Any, escape: Boolean = true, format: String = "%s" ): String = { if ( condition ) { - " %s ".format(formatArgument("", param, "", false, escape, format)) + " %s ".format(formatArgument("", param, "", spaceSeparated = false, escape = escape, paramFormat = format)) } else { "" diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala index 13448afdd..b9cb8540f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/JavaCommandLineFunction.scala @@ -54,6 +54,16 @@ trait JavaCommandLineFunction extends CommandLineFunction { */ var javaGCThreads: Option[Int] = None + /** + * Max percent of time spent in garbage collection + */ + var javaGCTimeLimit: Option[Int] = None + + /** + * Min percent of max heap freed during a garbage collection + */ + var javaGCHeapFreeLimit: Option[Int] = None + override def freezeFieldValues() { super.freezeFieldValues() @@ -62,6 +72,37 @@ trait JavaCommandLineFunction extends CommandLineFunction { if (javaMainClass != null && javaClasspath.isEmpty) javaClasspath = JavaCommandLineFunction.currentClasspath + + if (!this.qSettings.disableDefaultJavaGCOptimizations) { + // By default set the GC threads to 4 + if (javaGCThreads.isEmpty) + javaGCThreads = Some(4) + + // By default exit if more than 50% of time in GC + if (javaGCTimeLimit.isEmpty) + javaGCTimeLimit = Some(50) + + // By default exit if GC does not free up 10% of the heap + if (javaGCHeapFreeLimit.isEmpty) + javaGCHeapFreeLimit = Some(10) + } + } + + + override def copySettingsTo(function: QFunction) { + super.copySettingsTo(function) + function match { + case java: JavaCommandLineFunction => + if (java.javaMemoryLimit.isEmpty) + java.javaMemoryLimit = this.javaMemoryLimit + if (java.javaGCThreads.isEmpty) + java.javaGCThreads = this.javaGCThreads + if (java.javaGCTimeLimit.isEmpty) + java.javaGCTimeLimit = this.javaGCTimeLimit + if (java.javaGCHeapFreeLimit.isEmpty) + java.javaGCHeapFreeLimit = this.javaGCHeapFreeLimit + case _ => /* ignore */ + } } /** @@ -77,10 +118,13 @@ trait JavaCommandLineFunction extends CommandLineFunction { null } - def javaOpts = optional("-Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m", spaceSeparated=false) + - conditional(javaGCThreads.isDefined, "-XX:+UseParallelOldGC") + - optional("-XX:ParallelGCThreads=", javaGCThreads, spaceSeparated=false) + - required("-Djava.io.tmpdir=", jobTempDir, spaceSeparated=false) + def javaOpts = Array( + optional("-Xmx", javaMemoryLimit.map(gb => (gb * 1024).ceil.toInt), "m", spaceSeparated=false), + conditional(javaGCThreads.isDefined || javaGCTimeLimit.isDefined || javaGCHeapFreeLimit.isDefined, "-XX:+UseParallelOldGC"), + optional("-XX:ParallelGCThreads=", javaGCThreads, spaceSeparated=false), + optional("-XX:GCTimeLimit=", javaGCTimeLimit, spaceSeparated=false), + optional("-XX:GCHeapFreeLimit=", javaGCHeapFreeLimit, spaceSeparated=false), + required("-Djava.io.tmpdir=", jobTempDir, spaceSeparated=false)).mkString("") def commandLine = required("java") + javaOpts + diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala index 7d9debbdc..9f7932d39 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala @@ -112,6 +112,18 @@ trait QFunction extends Logging with QJobReport { /** File to redirect any errors. Defaults to .out */ var jobErrorFile: File = _ + /** Errors (if any) from the last failed run of jobErrorFiles. */ + var jobErrorLines: Seq[String] = Nil + + /** + * The number of times this function has previously been run. + */ + var retries = 0 + + /** Change settings for the next run. Retries will be set to the number of times the function was run and jobErrorLines may contain the error text. */ + def setupRetry() { + } + /** * Description of this command line function. */ diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/RetryMemoryLimit.scala b/public/scala/src/org/broadinstitute/sting/queue/function/RetryMemoryLimit.scala new file mode 100644 index 000000000..8bba5551f --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/function/RetryMemoryLimit.scala @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.function + +/** A mixin that on retry increases the memory limit when certain text is found. */ +trait RetryMemoryLimit extends CommandLineFunction { + + /** How to increase the memory. By default doubles the memory. */ + var retryMemoryFunction: (Double => Double) = (2 * _) + + /** Once the threshold is passed, no more memory will be added to memory limit. */ + var memoryLimitThreshold: Option[Double] = None + + /** Various strings to look for to determine we ran out of memory. */ + var memoryLimitErrorText = Seq("OutOfMemory", "you did not provide enough memory", "TERM_MEMLIMIT") + + override def freezeFieldValues() { + super.freezeFieldValues() + if (this.memoryLimitThreshold.isEmpty) + this.memoryLimitThreshold = this.qSettings.memoryLimitThreshold + } + + override def setupRetry() { + super.setupRetry() + if (this.memoryLimitThreshold.isDefined && this.memoryLimit.isDefined) { + + // NOTE: If we're already at or above the memoryLimit, don't do anything. + if (this.memoryLimit.get < this.memoryLimitThreshold.get) { + updateMemoryLimits() + } + + } else { + updateMemoryLimits() + } + } + + def updateMemoryLimits() { + if (isMemoryError) { + this.memoryLimit = this.memoryLimit.map(this.retryMemoryFunction) + this.residentRequest = this.residentRequest.map(this.retryMemoryFunction) + this.residentLimit = this.residentLimit.map(this.retryMemoryFunction) + + // Rebuffer the memory limit if the limit was set exactly to the request + if (this.residentLimit == this.residentRequest) + this.residentLimit = this.residentRequest.map(this.residentLimitBuffer) + + this match { + case java: JavaCommandLineFunction => + java.javaMemoryLimit = java.javaMemoryLimit.map(this.retryMemoryFunction) + case _ => /* ignore */ + } + } + } + + def isMemoryError = this.jobErrorLines.exists(line => this.memoryLimitErrorText.exists(error => line.contains(error))) +} diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala new file mode 100644 index 000000000..a9a5928fc --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.pipeline.examples + +import org.testng.annotations.Test +import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.BaseTest + +class ExampleRetryMemoryLimitPipelineTest { + @Test + def testRetryMemoryLimit() { + val spec = new PipelineTestSpec + spec.name = "RetryMemoryLimit" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala", + " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", + " -I " + BaseTest.publicTestDir + "exampleBAM.bam", + " -retry 1").mkString + spec.jobRunners = PipelineTest.allJobRunners + PipelineTest.executeTest(spec) + } +} diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala index c9d8b59c9..f6fcd7c12 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala @@ -24,7 +24,7 @@ package org.broadinstitute.sting.queue.pipeline.examples -import org.testng.annotations.Test +import org.testng.annotations.{DataProvider, Test} import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest @@ -43,42 +43,53 @@ class ExampleUnifiedGenotyperPipelineTest { PipelineTest.executeTest(spec) } - @Test - def testUnifiedGenotyperWithGatkIntervals() { + @DataProvider(name = "ugIntervals") + def getUnifiedGenotyperIntervals = + Array( + Array("gatk_intervals", BaseTest.validationDataLocation + "intervalTest.intervals"), + Array("bed_intervals", BaseTest.validationDataLocation + "intervalTest.bed"), + Array("vcf_intervals", BaseTest.validationDataLocation + "intervalTest.1.vcf") + ).asInstanceOf[Array[Array[Object]]] + + @Test(dataProvider = "ugIntervals") + def testUnifiedGenotyperWithIntervals(intervalsName: String, intervalsPath: String) { val spec = new PipelineTestSpec - spec.name = "unifiedgenotyper_with_gatk_intervals" + spec.name = "unifiedgenotyper_with_" + intervalsName spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam", " -R " + BaseTest.hg18Reference, - " -L " + BaseTest.validationDataLocation + "intervalTest.intervals").mkString + " -L " + intervalsPath).mkString spec.jobRunners = Seq("Lsf706") PipelineTest.executeTest(spec) } @Test - def testUnifiedGenotyperWithBedIntervals() { + def testUnifiedGenotyperNoGCOpt() { val spec = new PipelineTestSpec - spec.name = "unifiedgenotyper_with_bed_intervals" + spec.name = "unifiedgenotyper_no_gc_opt" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", - " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam", - " -R " + BaseTest.hg18Reference, - " -L " + BaseTest.validationDataLocation + "intervalTest.bed").mkString - spec.jobRunners = Seq("Lsf706") + " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", + " -I " + BaseTest.publicTestDir + "exampleBAM.bam", + " -noGCOpt").mkString + spec.jobRunners = PipelineTest.allJobRunners PipelineTest.executeTest(spec) } - @Test - def testUnifiedGenotyperWithVcfIntervals() { + @DataProvider(name="resMemReqParams") + def getResMemReqParam = Array(Array("mem_free"), Array("virtual_free")).asInstanceOf[Array[Array[Object]]] + + @Test(dataProvider = "resMemReqParams") + def testUnifiedGenotyperResMemReqParam(reqParam: String) { val spec = new PipelineTestSpec - spec.name = "unifiedgenotyper_with_vcf_intervals" + spec.name = "unifiedgenotyper_" + reqParam spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", - " -I " + BaseTest.validationDataLocation + "OV-0930.normal.chunk.bam", - " -R " + BaseTest.hg18Reference, - " -L " + BaseTest.validationDataLocation + "intervalTest.1.vcf").mkString - spec.jobRunners = Seq("Lsf706") + " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", + " -I " + BaseTest.publicTestDir + "exampleBAM.bam", + " -resMemReqParam " + reqParam).mkString + spec.jobRunners = Seq("GridEngine") PipelineTest.executeTest(spec) } } From f809f24afbabddf1e2558e5532f84ec6b2d3a209 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Mon, 13 Aug 2012 16:49:27 -0400 Subject: [PATCH 113/176] Removed SelectHeader's --include_reference_name option since the reference is always included. In SelectHeaders instead of including the path to the file, only include the name of the reference since dbGaP does not like paths in headers. --- .../walkers/variantutils/SelectHeaders.java | 12 +----- .../sting/utils/codecs/vcf/VCFUtils.java | 43 ++++++++++++------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java index f14f6c2a6..46a3a8cd1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java @@ -120,12 +120,6 @@ public class SelectHeaders extends RodWalker implements TreeRe @Argument(fullName = "exclude_header_name", shortName = "xl_hn", doc = "Exclude header. Can be specified multiple times", required = false) public Set XLheaderNames; - /** - * Note that reference inclusion takes precedence over other header matching. If set other reference lines may be excluded but the file name will still be added. - */ - @Argument(fullName = "include_reference_name", shortName = "irn", doc = "If set the reference file name minus the file extension will be added to the headers", required = false) - public boolean includeReference; - /** * Note that interval name inclusion takes precedence over other header matching. If set other interval lines may be excluded but the intervals will still be added. */ @@ -162,10 +156,6 @@ public class SelectHeaders extends RodWalker implements TreeRe // Select only the headers requested by name or expression. headerLines = new LinkedHashSet(getSelectedHeaders(headerLines)); - // Optionally add in the reference. - if (includeReference && getToolkit().getArguments().referenceFile != null) - headerLines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, FilenameUtils.getBaseName(getToolkit().getArguments().referenceFile.getName()))); - // Optionally add in the intervals. if (includeIntervals && getToolkit().getArguments().intervals != null) { for (IntervalBinding intervalBinding : getToolkit().getArguments().intervals) { @@ -205,7 +195,7 @@ public class SelectHeaders extends RodWalker implements TreeRe selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true); // always include the contig lines - selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary()); + selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary(), true); return selectedHeaders; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index f80b0eae4..561e8e78d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; +import org.apache.commons.io.FilenameUtils; import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.RodBinding; @@ -165,12 +166,13 @@ public class VCFUtils { if ( map.containsKey(key) ) { VCFHeaderLine other = map.get(key); - if ( line.equals(other) ) - continue; - else if ( ! line.getClass().equals(other.getClass()) ) + if ( line.equals(other) ) { + // continue; + } else if ( ! line.getClass().equals(other.getClass()) ) { throw new IllegalStateException("Incompatible header types: " + line + " " + other ); - else if ( line instanceof VCFFilterHeaderLine ) { - String lineName = ((VCFFilterHeaderLine) line).getID(); String otherName = ((VCFFilterHeaderLine) other).getID(); + } else if ( line instanceof VCFFilterHeaderLine ) { + String lineName = ((VCFFilterHeaderLine) line).getID(); + String otherName = ((VCFFilterHeaderLine) other).getID(); if ( ! lineName.equals(otherName) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); } else if ( line instanceof VCFCompoundHeaderLine ) { @@ -198,7 +200,7 @@ public class VCFUtils { throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other ); } } - if ( ! compLine.getDescription().equals(compOther) ) + if ( ! compLine.getDescription().equals(compOther.getDescription()) ) conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine); } else { // we are not equal, but we're not anything special either @@ -235,7 +237,7 @@ public class VCFUtils { * @param header the header to update * @param engine the GATK engine containing command line arguments and the master sequence dictionary */ - public final static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) { + public static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) { return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary()); } @@ -246,11 +248,15 @@ public class VCFUtils { * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ - public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { + public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); } - public final static Set withUpdatedContigsAsLines(final Set oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { + public static Set withUpdatedContigsAsLines(final Set oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { + return withUpdatedContigsAsLines(oldLines, referenceFile, refDict, false); + } + + public static Set withUpdatedContigsAsLines(final Set oldLines, final File referenceFile, final SAMSequenceDictionary refDict, boolean referenceNameOnly) { final Set lines = new LinkedHashSet(oldLines.size()); for ( final VCFHeaderLine line : oldLines ) { @@ -264,17 +270,24 @@ public class VCFUtils { for ( final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile) ) lines.add(contigLine); - lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + referenceFile.getAbsolutePath())); + String referenceValue; + if (referenceFile != null) { + if (referenceNameOnly) + referenceValue = FilenameUtils.getBaseName(referenceFile.getName()); + else + referenceValue = "file://" + referenceFile.getAbsolutePath(); + lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue)); + } return lines; } /** * Create VCFHeaderLines for each refDict entry, and optionally the assembly if referenceFile != null - * @param refDict + * @param refDict reference dictionary * @param referenceFile for assembly name. May be null - * @return + * @return list of vcf contig header lines */ - public final static List makeContigHeaderLines(final SAMSequenceDictionary refDict, + public static List makeContigHeaderLines(final SAMSequenceDictionary refDict, final File referenceFile) { final List lines = new ArrayList(); final String assembly = referenceFile != null ? getReferenceAssembly(referenceFile.getName()) : null; @@ -283,7 +296,7 @@ public class VCFUtils { return lines; } - private final static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) { + private static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) { final Map map = new LinkedHashMap(3); map.put("ID", contig.getSequenceName()); map.put("length", String.valueOf(contig.getSequenceLength())); @@ -291,7 +304,7 @@ public class VCFUtils { return new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, map, contig.getSequenceIndex()); } - private final static String getReferenceAssembly(final String refPath) { + private static String getReferenceAssembly(final String refPath) { // This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot String assembly = null; if (refPath.contains("b37") || refPath.contains("v37")) From cfb994abd20a5f17c155dd8a644cfb6372150a79 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 13 Aug 2012 22:55:02 -0400 Subject: [PATCH 114/176] Trivial removal of ununsed variable (mentioned in resolved JIRA entry) --- .../sting/gatk/arguments/GATKArgumentCollection.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 1e6920b82..4c9235b58 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -57,8 +57,6 @@ public class GATKArgumentCollection { public GATKArgumentCollection() { } - public Map walkerArgs = new HashMap(); - // parameters and their defaults @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) public List samFiles = new ArrayList(); From 34b62fa092222d068dd193217aa3d3652ecdad2f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 14 Aug 2012 12:54:31 -0400 Subject: [PATCH 115/176] Two changes to SelectVariants: 1) don't add DP INFO annotation if DP wasn't used in the input VCF (it was adding DP=0 previously). 2) If MLEAC or MLEAF is present in the original VCF and the number of samples decreases, remove those annotations from the VC. --- .../walkers/variantutils/SelectVariants.java | 16 +++++++++--- .../SelectVariantsIntegrationTest.java | 26 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index cf528de09..1493815ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -730,7 +730,13 @@ public class SelectVariants extends RodWalker implements TreeR if ( vc.getAlleles().size() != sub.getAlleles().size() ) newGC = VariantContextUtils.stripPLs(sub.getGenotypes()); - //Remove a fraction of the genotypes if needed + // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags + if ( vc.getNSamples() != sub.getNSamples() ) { + builder.rmAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY); + builder.rmAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY); + } + + // Remove a fraction of the genotypes if needed if ( fractionGenotypes > 0 ){ ArrayList genotypes = new ArrayList(); for ( Genotype genotype : newGC ) { @@ -767,17 +773,21 @@ public class SelectVariants extends RodWalker implements TreeR VariantContextUtils.calculateChromosomeCounts(builder, false); + boolean sawDP = false; int depth = 0; for (String sample : originalVC.getSampleNames()) { Genotype g = originalVC.getGenotype(sample); if ( ! g.isFiltered() ) { - if ( g.hasDP() ) + if ( g.hasDP() ) { depth += g.getDP(); + sawDP = true; + } } } - builder.attribute("DP", depth); + if ( sawDP ) + builder.attribute("DP", depth); } private void randomlyAddVariant(int rank, VariantContext vc) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index e25d65465..7a865ddeb 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -154,6 +154,32 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testRegenotype--" + testFile, spec); } + @Test + public void testRemoveMLE() { + String testFile = privateTestDir + "vcfexample.withMLE.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", + 1, + Arrays.asList("") + ); + + executeTest("testRegenotype--" + testFile, spec); + } + + @Test + public void testRemoveMLEAndRegenotype() { + String testFile = privateTestDir + "vcfexample.withMLE.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", + 1, + Arrays.asList("") + ); + + executeTest("testRegenotype--" + testFile, spec); + } + @Test public void testMultipleRecordsAtOnePosition() { String testFile = privateTestDir + "selectVariants.onePosition.vcf"; From 8e3774fb0e2aaa1c3b62cfda027fa1621ba0b4e8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 14 Aug 2012 14:21:42 -0400 Subject: [PATCH 116/176] Fixing behavior of the --regenotype argument in SelectVariants to properly run in GenotypeGivenAlleles mode. Added integration tests to cover recent SV changes. --- .../genotyper/UnifiedGenotyperEngine.java | 10 ++++++---- .../walkers/variantutils/SelectVariants.java | 3 ++- .../SelectVariantsIntegrationTest.java | 16 ++++++++-------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index f4bd196ae..f15fa9b99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -646,15 +646,17 @@ public class UnifiedGenotyperEngine { // if we're genotyping given alleles and we have a requested SNP at this position, do SNP if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); - if ( vcInput == null ) + final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); + if ( vcInput == null ) { + models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); return models; + } if ( vcInput.isSNP() ) { // ignore SNPs if the user chose INDEL mode only if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") ) models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); - } + } else if ( vcInput.isIndel() || vcInput.isMixed() ) { // ignore INDELs if the user chose SNP mode only if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") ) @@ -759,7 +761,7 @@ public class UnifiedGenotyperEngine { public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding allelesBinding) { if ( tracker == null || ref == null || logger == null ) - throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger); + return null; VariantContext vc = null; // search for usable record diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 1493815ee..0810710c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -470,6 +470,7 @@ public class SelectVariants extends RodWalker implements TreeR final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; + UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; UAC.NO_SLOD = true; UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null)); @@ -567,7 +568,7 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS); if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) { - final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(tracker, ref, context, sub)).filters(sub.getFiltersMaybeNull()); + final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(null, ref, context, sub)).filters(sub.getFiltersMaybeNull()); addAnnotations(builder, sub); sub = builder.make(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 7a865ddeb..e172200f7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -34,7 +34,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -sn B -sn C --variant " + testfile), 1, - Arrays.asList("3d98a024bf3aecbd282843e0af89d0e6") + Arrays.asList("125d1c9fa111cd38dfa2ff3900f16b57") ); executeTest("testRepeatedLineSelection--" + testfile, spec); @@ -49,7 +49,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING", 1, - Arrays.asList("54289033d35d32b8ebbb38c51fbb614c") + Arrays.asList("c0b937edb6a8b6392d477511d4f1ebcf") ); spec.disableShadowBCF(); @@ -135,7 +135,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("d12ae1617deb38f5ed712dc326935b9a") + Arrays.asList("a554459c9ccafb9812ff6d8c06c11726") ); executeTest("testUsingDbsnpName--" + testFile, spec); @@ -148,7 +148,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("c22ad8864d9951403672a24c20d6c3c2") + Arrays.asList("52cb2f150559ca1457e9df7ec153dbb452cb2f150559ca1457e9df7ec153dbb4") ); executeTest("testRegenotype--" + testFile, spec); @@ -161,10 +161,10 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("") + Arrays.asList("a554459c9ccafb9812ff6d8c06c11726") ); - executeTest("testRegenotype--" + testFile, spec); + executeTest("testRemoveMLE--" + testFile, spec); } @Test @@ -174,10 +174,10 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("") + Arrays.asList("52cb2f150559ca1457e9df7ec153dbb4") ); - executeTest("testRegenotype--" + testFile, spec); + executeTest("testRemoveMLEAndRegenotype--" + testFile, spec); } @Test From 87e41c83c56b2f36bd95f8a1736c530d588446b0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 14 Aug 2012 15:02:30 -0400 Subject: [PATCH 117/176] In AlleleCount stratification, check to make sure the AC (or MLEAC) is valid (i.e. not higher than number of chromosomes) and throw a User Error if it isn't. Added a test for bad AC. --- .../varianteval/stratifications/AlleleCount.java | 4 ++++ .../varianteval/VariantEvalIntegrationTest.java | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 158f20b61..50c5526e4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -51,6 +51,10 @@ public class AlleleCount extends VariantStratifier { AC = Math.max(AC, eval.getCalledChrCount(allele)); } + // make sure that the AC isn't invalid + if ( AC > eval.getCalledChrCount() ) + throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d is larger than the possible called chromosome count (%d)", AC, eval.getChr(), eval.getStart(), eval.getCalledChrCount())); + return Collections.singletonList((Object) AC); } else { return Collections.emptyList(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 94e52c2b9..c92d6d4cf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -585,6 +585,21 @@ public class VariantEvalIntegrationTest extends WalkerTest { executeTest("testStandardIndelEval", spec); } + @Test + public void testBadACValue() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-eval " + privateTestDir + "vcfexample.withBadAC.vcf", + "-noST -ST AlleleCount", + "-noEV -EV VariantSummary" + ), + 0, + UserException.class); + executeTest("testBadACValue", spec); + } + @Test() public void testIncompatibleEvalAndStrat() { From 9b84fa20bfbe6955c1309776b68b03040b29f505 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 15 Aug 2012 15:56:54 -0400 Subject: [PATCH 119/176] Fix an issue with the classpath for external builds in build.xml Use "path" instead of "pathconvert" to construct the external.gatk.classpath. This allows the path to evolve as the build progresses, instead of being fixed early on to a value that (in some cases) could be incorrect. --- build.xml | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/build.xml b/build.xml index 135b9dac7..2ca48c51b 100644 --- a/build.xml +++ b/build.xml @@ -70,13 +70,18 @@ + + + + - + + @@ -96,8 +101,6 @@ - - @@ -208,19 +211,11 @@ - - - - - - - - - + - + @@ -432,7 +427,7 @@ - + From f277d7c09e06ca0cc39054d671312afd9a1e8d8d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 13 Aug 2012 15:59:35 -0400 Subject: [PATCH 121/176] Removing parallelism bottleneck in the GATK -- GenomeLocParser cache was a major performance bottleneck in parallel GATK performance. With 10 thread > 50% of each thread's time was spent blocking on the MasterSequencingDictionary object. Made this a thread local variable. -- Now we can run the GATK with 48 threads efficiently on GSA4! -- Running -nt 1 => 75 minutes (didn't let is run all of the way through so likely would take longer) -- Running -nt 24 => 3.81 minutes --- .../sting/utils/GenomeLocParser.java | 81 +++++++++++-------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 4f2b5b2eb..77ecd295f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -43,9 +43,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; /** * Factory class for creating GenomeLocs */ -@Invariant({ - "logger != null", - "contigInfo != null"}) public final class GenomeLocParser { private static Logger logger = Logger.getLogger(GenomeLocParser.class); @@ -54,20 +51,39 @@ public final class GenomeLocParser { // Ugly global variable defining the optional ordering of contig elements // // -------------------------------------------------------------------------------------------------------------- - private final MasterSequenceDictionary contigInfo; + + /** + * This single variable holds the underlying SamSequenceDictionary used by the GATK. We assume + * it is thread safe. + */ + final private SAMSequenceDictionary SINGLE_MASTER_SEQUENCE_DICTIONARY; + + /** + * A thread-local caching contig info + */ + private final ThreadLocal contigInfoPerThread = + new ThreadLocal(); + + /** + * @return a caching sequence dictionary appropriate for this thread + */ + private CachingSequenceDictionary getContigInfo() { + if ( contigInfoPerThread.get() == null ) { + // initialize for this thread + logger.debug("Creating thread-local caching sequence dictionary for thread " + Thread.currentThread().getName()); + contigInfoPerThread.set(new CachingSequenceDictionary(SINGLE_MASTER_SEQUENCE_DICTIONARY)); + } + + assert contigInfoPerThread.get() != null; + + return contigInfoPerThread.get(); + } /** * A wrapper class that provides efficient last used caching for the global - * SAMSequenceDictionary underlying all of the GATK engine capabilities + * SAMSequenceDictionary underlying all of the GATK engine capabilities. */ - // todo -- enable when CoFoJa developers identify the problem (likely thread unsafe invariants) -// @Invariant({ -// "dict != null", -// "dict.size() > 0", -// "lastSSR == null || dict.getSequence(lastContig).getSequenceIndex() == lastIndex", -// "lastSSR == null || dict.getSequence(lastContig).getSequenceName() == lastContig", -// "lastSSR == null || dict.getSequence(lastContig) == lastSSR"}) - private final class MasterSequenceDictionary { + private final class CachingSequenceDictionary { final private SAMSequenceDictionary dict; // cache @@ -76,7 +92,7 @@ public final class GenomeLocParser { int lastIndex = -1; @Requires({"dict != null", "dict.size() > 0"}) - public MasterSequenceDictionary(SAMSequenceDictionary dict) { + public CachingSequenceDictionary(SAMSequenceDictionary dict) { this.dict = dict; } @@ -111,7 +127,6 @@ public final class GenomeLocParser { return lastSSR; else return updateCache(null, index); - } @Requires("contig != null") @@ -125,12 +140,12 @@ public final class GenomeLocParser { } @Requires({"contig != null", "lastContig != null"}) - private final synchronized boolean isCached(final String contig) { + private synchronized boolean isCached(final String contig) { return lastContig.equals(contig); } @Requires({"lastIndex != -1", "index >= 0"}) - private final synchronized boolean isCached(final int index) { + private synchronized boolean isCached(final int index) { return lastIndex == index; } @@ -144,7 +159,7 @@ public final class GenomeLocParser { */ @Requires("contig != null || index >= 0") @Ensures("result != null") - private final synchronized SAMSequenceRecord updateCache(final String contig, int index ) { + private synchronized SAMSequenceRecord updateCache(final String contig, int index ) { SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig); if ( rec == null ) { throw new ReviewedStingException("BUG: requested unknown contig=" + contig + " index=" + index); @@ -174,7 +189,7 @@ public final class GenomeLocParser { throw new UserException.CommandLineException("Failed to load reference dictionary"); } - contigInfo = new MasterSequenceDictionary(seqDict); + SINGLE_MASTER_SEQUENCE_DICTIONARY = seqDict; logger.debug(String.format("Prepared reference sequence contig dictionary")); for (SAMSequenceRecord contig : seqDict.getSequences()) { logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength())); @@ -188,11 +203,11 @@ public final class GenomeLocParser { * @return True if the contig is valid. False otherwise. */ public final boolean contigIsInDictionary(String contig) { - return contig != null && contigInfo.hasContig(contig); + return contig != null && getContigInfo().hasContig(contig); } public final boolean indexIsInDictionary(final int index) { - return index >= 0 && contigInfo.hasContig(index); + return index >= 0 && getContigInfo().hasContig(index); } @@ -208,7 +223,7 @@ public final class GenomeLocParser { public final SAMSequenceRecord getContigInfo(final String contig) { if ( contig == null || ! contigIsInDictionary(contig) ) throw new UserException.MalformedGenomeLoc(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig)); - return contigInfo.getSequence(contig); + return getContigInfo().getSequence(contig); } /** @@ -226,9 +241,9 @@ public final class GenomeLocParser { @Requires("contig != null") protected int getContigIndexWithoutException(final String contig) { - if ( contig == null || ! contigInfo.hasContig(contig) ) + if ( contig == null || ! getContigInfo().hasContig(contig) ) return -1; - return contigInfo.getSequenceIndex(contig); + return getContigInfo().getSequenceIndex(contig); } /** @@ -236,7 +251,7 @@ public final class GenomeLocParser { * @return */ public final SAMSequenceDictionary getContigs() { - return contigInfo.dict; + return getContigInfo().dict; } // -------------------------------------------------------------------------------------------------------------- @@ -291,7 +306,7 @@ public final class GenomeLocParser { * @return true if it's valid, false otherwise. If exceptOnError, then throws a UserException if invalid */ private boolean validateGenomeLoc(String contig, int contigIndex, int start, int stop, boolean mustBeOnReference, boolean exceptOnError) { - if ( ! contigInfo.hasContig(contig) ) + if ( ! getContigInfo().hasContig(contig) ) return vglHelper(exceptOnError, String.format("Unknown contig %s", contig)); if (stop < start) @@ -300,8 +315,8 @@ public final class GenomeLocParser { if (contigIndex < 0) return vglHelper(exceptOnError, String.format("The contig index %d is less than 0", contigIndex)); - if (contigIndex >= contigInfo.getNSequences()) - return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, contigInfo.getNSequences())); + if (contigIndex >= getContigInfo().getNSequences()) + return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, getContigInfo().getNSequences())); if ( mustBeOnReference ) { if (start < 1) @@ -310,7 +325,7 @@ public final class GenomeLocParser { if (stop < 1) return vglHelper(exceptOnError, String.format("The stop position %d is less than 1", stop)); - int contigSize = contigInfo.getSequence(contigIndex).getSequenceLength(); + int contigSize = getContigInfo().getSequence(contigIndex).getSequenceLength(); if (start > contigSize || stop > contigSize) return vglHelper(exceptOnError, String.format("The genome loc coordinates %d-%d exceed the contig size (%d)", start, stop, contigSize)); } @@ -558,7 +573,7 @@ public final class GenomeLocParser { @Requires("contigName != null") @Ensures("result != null") public GenomeLoc createOverEntireContig(String contigName) { - SAMSequenceRecord contig = contigInfo.getSequence(contigName); + SAMSequenceRecord contig = getContigInfo().getSequence(contigName); return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true); } @@ -573,7 +588,7 @@ public final class GenomeLocParser { if (GenomeLoc.isUnmapped(loc)) return null; String contigName = loc.getContig(); - SAMSequenceRecord contig = contigInfo.getSequence(contigName); + SAMSequenceRecord contig = getContigInfo().getSequence(contigName); int contigIndex = contig.getSequenceIndex(); int start = loc.getStart() - maxBasePairs; @@ -598,7 +613,7 @@ public final class GenomeLocParser { if (GenomeLoc.isUnmapped(loc)) return loc; final String contigName = loc.getContig(); - final SAMSequenceRecord contig = contigInfo.getSequence(contigName); + final SAMSequenceRecord contig = getContigInfo().getSequence(contigName); final int contigIndex = contig.getSequenceIndex(); final int contigLength = contig.getSequenceLength(); @@ -619,7 +634,7 @@ public final class GenomeLocParser { if (GenomeLoc.isUnmapped(loc)) return null; String contigName = loc.getContig(); - SAMSequenceRecord contig = contigInfo.getSequence(contigName); + SAMSequenceRecord contig = getContigInfo().getSequence(contigName); int contigIndex = contig.getSequenceIndex(); int contigLength = contig.getSequenceLength(); From be3230a1fdc6e526e1f1cd82538568ab8498abc9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 14 Aug 2012 15:02:45 -0400 Subject: [PATCH 123/176] Initial implementation of ThreadFactory that monitors running / blocking / waiting time of threads it creates -- Created makeCombinations utility function (very useful!). Moved template from VariantContextTestProvider -- UnitTests for basic functionality --- .../org/broadinstitute/sting/utils/Utils.java | 30 ++++ .../StateMonitoringThreadFactory.java | 149 ++++++++++++++++ .../StateMonitoringThreadFactoryUnitTest.java | 161 ++++++++++++++++++ .../VariantContextTestProvider.java | 16 +- 4 files changed, 342 insertions(+), 14 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java create mode 100755 public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 17c145dbf..14d7177a0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -732,6 +732,36 @@ public class Utils { } } + /** + * Make all combinations of N size of objects + * + * if objects = [A, B, C] + * if N = 1 => [[A], [B], [C]] + * if N = 2 => [[A, A], [B, A], [C, A], [A, B], [B, B], [C, B], [A, C], [B, C], [C, C]] + * + * @param objects + * @param n + * @param + * @return + */ + public static List> makeCombinations(final List objects, final int n) { + final List> combinations = new ArrayList>(); + + if ( n == 1 ) { + for ( final T o : objects ) + combinations.add(Collections.singletonList(o)); + } else { + final List> sub = makeCombinations(objects, n - 1); + for ( List subI : sub ) { + for ( final T a : objects ) { + combinations.add(Utils.cons(a, subI)); + } + } + } + + return combinations; + } + /** * Convenience function that formats the novelty rate as a %.2f string * diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java b/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java new file mode 100644 index 000000000..1e0988bb7 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java @@ -0,0 +1,149 @@ +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package org.broadinstitute.sting.utils.threading; + +import org.apache.log4j.Logger; + +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.EnumMap; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ThreadFactory; + +/** + * Create threads, collecting statistics about their running state over time + * + * Uses a ThreadMXBean to capture info via ThreadInfo + * + * User: depristo + * Date: 8/14/12 + * Time: 8:47 AM + */ +public class StateMonitoringThreadFactory implements ThreadFactory { + protected static final boolean DEBUG = false; + private static Logger logger = Logger.getLogger(StateMonitoringThreadFactory.class); + public static final List TRACKED_STATES = Arrays.asList(Thread.State.BLOCKED, Thread.State.RUNNABLE, Thread.State.WAITING); + + final int threadsToCreate; + final List threads; + final EnumMap times = new EnumMap(Thread.State.class); + final ThreadMXBean bean; + final CountDownLatch activeThreads; + + public StateMonitoringThreadFactory(final int threadsToCreate) { + if ( threadsToCreate <= 0 ) throw new IllegalArgumentException("threadsToCreate <= 0: " + threadsToCreate); + + this.threadsToCreate = threadsToCreate; + threads = new ArrayList(threadsToCreate); + for ( final Thread.State state : Thread.State.values() ) + times.put(state, 0l); + bean = ManagementFactory.getThreadMXBean(); + bean.setThreadContentionMonitoringEnabled(true); + bean.setThreadCpuTimeEnabled(true); + activeThreads = new CountDownLatch(threadsToCreate); + } + + public synchronized long getStateTime(final Thread.State state) { + return times.get(state); + } + + public synchronized long getTotalTime() { + long total = 0; + for ( final long time : times.values() ) + total += time; + return total; + } + + public synchronized double getStateFraction(final Thread.State state) { + return getStateTime(state) / (1.0 * getTotalTime()); + } + + public int getNThreads() { + return threads.size(); + } + + public void waitForAllThreadsToComplete() throws InterruptedException { + activeThreads.await(); + } + + @Override + public synchronized String toString() { + final StringBuilder b = new StringBuilder(); + + b.append("total ").append(getTotalTime()).append(" "); + for ( final Thread.State state : TRACKED_STATES ) { + b.append(state).append(" ").append(getStateTime(state)).append(" "); + } + + return b.toString(); + } + + @Override + public synchronized Thread newThread(final Runnable runnable) { + if ( threads.size() >= threadsToCreate ) + throw new IllegalStateException("Attempting to create more threads than allowed by constructor argument threadsToCreate " + threadsToCreate); + + final Thread myThread = new TrackingThread(runnable); + threads.add(myThread); + return myThread; + } + + // TODO -- add polling capability + + private synchronized void updateThreadInfo(final Thread thread, final long runtime) { + if ( DEBUG ) logger.warn("UpdateThreadInfo called"); + final ThreadInfo info = bean.getThreadInfo(thread.getId()); + if ( info != null ) { + if ( DEBUG ) logger.warn("Updating thread total runtime " + runtime + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime()); + incTimes(Thread.State.BLOCKED, info.getBlockedTime()); + incTimes(Thread.State.WAITING, info.getWaitedTime()); + incTimes(Thread.State.RUNNABLE, runtime - info.getWaitedTime() - info.getBlockedTime()); + } + } + + private synchronized void incTimes(final Thread.State state, final long by) { + times.put(state, times.get(state) + by); + } + + private class TrackingThread extends Thread { + private TrackingThread(Runnable runnable) { + super(runnable); + } + + @Override + public void run() { + final long startTime = System.currentTimeMillis(); + super.run(); + final long endTime = System.currentTimeMillis(); + if ( DEBUG ) logger.warn(" Countdown " + activeThreads.getCount() + " in thread " + Thread.currentThread().getName()); + updateThreadInfo(this, endTime - startTime); + activeThreads.countDown(); + if ( DEBUG ) logger.warn(" -> Countdown " + activeThreads.getCount() + " in thread " + Thread.currentThread().getName()); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java new file mode 100755 index 000000000..6fc852bbf --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java @@ -0,0 +1,161 @@ +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package org.broadinstitute.sting.utils.threading; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.*; + +/** + * Tests for the state monitoring thread factory. + */ +public class StateMonitoringThreadFactoryUnitTest extends BaseTest { + private final static long THREAD_TARGET_DURATION_IN_MILLISECOND = 100; + final static Object GLOBAL_LOCK = new Object(); + + private class StateTest extends TestDataProvider { + private final double TOLERANCE = 0.1; // willing to tolerate a 10% error + + final List statesForThreads; + + public StateTest(final List statesForThreads) { + super(StateTest.class); + this.statesForThreads = statesForThreads; + setName("StateTest " + Utils.join(",", statesForThreads)); + } + + public List getStatesForThreads() { + return statesForThreads; + } + + public int getNStates() { return statesForThreads.size(); } + + public double maxStateFraction(final Thread.State state) { return fraction(state) + TOLERANCE; } + public double minStateFraction(final Thread.State state) { return fraction(state) - TOLERANCE; } + + private double fraction(final Thread.State state) { + return Collections.frequency(statesForThreads, state) / (1.0 * statesForThreads.size()); + } + } + + private static class StateTestThread implements Callable { + private final Thread.State stateToImplement; + + private StateTestThread(final Thread.State stateToImplement) { + this.stateToImplement = stateToImplement; + } + + @Override + public Double call() throws Exception { + switch ( stateToImplement ) { + case RUNNABLE: + // do some work until we get to THREAD_TARGET_DURATION_IN_MILLISECOND + double sum = 0.0; + final long startTime = System.currentTimeMillis(); + for ( int i = 1; System.currentTimeMillis() - startTime < (THREAD_TARGET_DURATION_IN_MILLISECOND - 1); i++ ) { + sum += Math.log10(i); + } + return sum; + case WAITING: + Thread.currentThread().sleep(THREAD_TARGET_DURATION_IN_MILLISECOND); + return 0.0; + case BLOCKED: + if ( StateMonitoringThreadFactory.DEBUG ) logger.warn("Blocking..."); + synchronized (GLOBAL_LOCK) { + if ( StateMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking"); + } + return 0.0; + default: + throw new ReviewedStingException("Unexpected thread test state " + stateToImplement); + } + } + } + + @DataProvider(name = "StateTest") + public Object[][] createStateTest() { + for ( final int nThreads : Arrays.asList(1, 2, 3, 4, 5) ) { + for (final List states : Utils.makeCombinations(StateMonitoringThreadFactory.TRACKED_STATES, nThreads) ) { + //if ( Collections.frequency(states, Thread.State.BLOCKED) > 0) + new StateTest(states); + } + } + + return StateTest.getTests(StateTest.class); + } + + @Test(enabled = true, dataProvider = "StateTest") + public void testStateTest(final StateTest test) throws InterruptedException { + // allows us to test blocking + final StateMonitoringThreadFactory factory = new StateMonitoringThreadFactory(test.getNStates()); + final ExecutorService threadPool = Executors.newFixedThreadPool(test.getNStates(), factory); + + logger.warn("Running " + test); + synchronized (GLOBAL_LOCK) { + //logger.warn(" Have lock"); + for ( final Thread.State threadToRunState : test.getStatesForThreads() ) + threadPool.submit(new StateTestThread(threadToRunState)); + + // lock has to be here for the whole running of the threads but end before the sleep so the blocked threads + // can block for their allotted time + threadPool.shutdown(); + Thread.sleep(THREAD_TARGET_DURATION_IN_MILLISECOND); + } + //logger.warn(" Releasing lock"); + threadPool.awaitTermination(10, TimeUnit.SECONDS); + //logger.warn(" done awaiting termination"); + //logger.warn(" waiting for all threads to complete"); + factory.waitForAllThreadsToComplete(); + //logger.warn(" done waiting for threads"); + + // make sure we counted everything properly + final long totalTime = factory.getTotalTime(); + final long minTime = (THREAD_TARGET_DURATION_IN_MILLISECOND - 10) * test.getNStates(); + //logger.warn("Testing total time"); + Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime); + + for (final Thread.State state : StateMonitoringThreadFactory.TRACKED_STATES ) { + final double min = test.minStateFraction(state); + final double max = test.maxStateFraction(state); + final double obs = factory.getStateFraction(state); + logger.warn(" Checking " + state + + " min " + String.format("%.2f", min) + + " max " + String.format("%.2f", max) + + " obs " + String.format("%.2f", obs) + + " factor = " + factory); + Assert.assertTrue(obs >= min, "Too little time spent in state " + state + " obs " + obs + " min " + min); + Assert.assertTrue(obs <= max, "Too much time spent in state " + state + " obs " + obs + " max " + min); + } + + Assert.assertEquals(factory.getNThreads(), test.getNStates()); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index b95e589b7..dd1985be3 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -888,20 +888,8 @@ public class VariantContextTestProvider { } } - private static final List> makeAllGenotypes(final List alleles, final int highestPloidy) { - final List> combinations = new ArrayList>(); - if ( highestPloidy == 1 ) { - for ( final Allele a : alleles ) - combinations.add(Collections.singletonList(a)); - } else { - final List> sub = makeAllGenotypes(alleles, highestPloidy - 1); - for ( List subI : sub ) { - for ( final Allele a : alleles ) { - combinations.add(Utils.cons(a, subI)); - } - } - } - return combinations; + private static List> makeAllGenotypes(final List alleles, final int highestPloidy) { + return Utils.makeCombinations(alleles, highestPloidy); } public static void assertEquals(final VCFHeader actual, final VCFHeader expected) { From 9459e6203aeb3cdd20e311aac2d8718b2d689106 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 14 Aug 2012 16:27:30 -0400 Subject: [PATCH 124/176] Clean, documented implementation of ThreadFactory that monitors running / blocking / waiting time of threads it creates -- Expanded unit tests -- Support for clean logging of results to logger -- Refactored MyTime into AutoFormattingTime in Utils, out of TraversalEngine, for cleanliness and reuse -- Added docs and contracts to StateMonitoringThreadFactory --- .../gatk/traversals/TraversalEngine.java | 67 +----- .../sting/utils/AutoFormattingTime.java | 53 +++++ .../StateMonitoringThreadFactory.java | 208 +++++++++++++++--- .../sting/utils/threading/package-info.java | 2 +- .../StateMonitoringThreadFactoryUnitTest.java | 36 ++- 5 files changed, 265 insertions(+), 101 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 4ef255524..2593fc72e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -62,54 +62,6 @@ public abstract class TraversalEngine,Provide } - /** - * Simple utility class that makes it convenient to print unit adjusted times - */ - private static class MyTime { - double t; // in Seconds - int precision; // for format - - public MyTime(double t, int precision) { - this.t = t; - this.precision = precision; - } - - public MyTime(double t) { - this(t, 1); - } - - /** - * Instead of 10000 s, returns 2.8 hours - * @return - */ - public String toString() { - double unitTime = t; - String unit = "s"; - - if ( t > 120 ) { - unitTime = t / 60; // minutes - unit = "m"; - - if ( unitTime > 120 ) { - unitTime /= 60; // hours - unit = "h"; - - if ( unitTime > 100 ) { - unitTime /= 24; // days - unit = "d"; - - if ( unitTime > 20 ) { - unitTime /= 7; // days - unit = "w"; - } - } - } - } - - return String.format("%6."+precision+"f %s", unitTime, unit); - } - } - /** lock object to sure updates to history are consistent across threads */ private static final Object lock = new Object(); LinkedList history = new LinkedList(); @@ -280,20 +232,20 @@ public abstract class TraversalEngine,Provide ProcessingHistory last = updateHistory(loc,cumulativeMetrics); - final MyTime elapsed = new MyTime(last.elapsedSeconds); - final MyTime bpRate = new MyTime(secondsPerMillionBP(last)); - final MyTime unitRate = new MyTime(secondsPerMillionElements(last)); + final AutoFormattingTime elapsed = new AutoFormattingTime(last.elapsedSeconds); + final AutoFormattingTime bpRate = new AutoFormattingTime(secondsPerMillionBP(last)); + final AutoFormattingTime unitRate = new AutoFormattingTime(secondsPerMillionElements(last)); final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last); - final MyTime estTotalRuntime = new MyTime(elapsed.t / fractionGenomeTargetCompleted); - final MyTime timeToCompletion = new MyTime(estTotalRuntime.t - elapsed.t); + final AutoFormattingTime estTotalRuntime = new AutoFormattingTime(elapsed.getTimeInSeconds() / fractionGenomeTargetCompleted); + final AutoFormattingTime timeToCompletion = new AutoFormattingTime(estTotalRuntime.getTimeInSeconds() - elapsed.getTimeInSeconds()); if ( printProgress ) { lastProgressPrintTime = curTime; // dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates - if ( estTotalRuntime.t > TWELVE_HOURS_IN_SECONDS ) + if ( estTotalRuntime.getTimeInSeconds() > TWELVE_HOURS_IN_SECONDS ) PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds - else if ( estTotalRuntime.t > TWO_HOURS_IN_SECONDS ) + else if ( estTotalRuntime.getTimeInSeconds() > TWO_HOURS_IN_SECONDS ) PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds else PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds @@ -308,8 +260,9 @@ public abstract class TraversalEngine,Provide lastPerformanceLogPrintTime = curTime; synchronized(performanceLogLock) { performanceLog.printf("%.2f\t%d\t%.2e\t%d\t%.2e\t%.2e\t%.2f\t%.2f%n", - elapsed.t, nRecords, unitRate.t, last.bpProcessed, bpRate.t, - fractionGenomeTargetCompleted, estTotalRuntime.t, timeToCompletion.t); + elapsed.getTimeInSeconds(), nRecords, unitRate.getTimeInSeconds(), last.bpProcessed, + bpRate.getTimeInSeconds(), fractionGenomeTargetCompleted, estTotalRuntime.getTimeInSeconds(), + timeToCompletion.getTimeInSeconds()); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java new file mode 100644 index 000000000..8964c16cb --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java @@ -0,0 +1,53 @@ +package org.broadinstitute.sting.utils; + +/** + * Simple utility class that makes it convenient to print unit adjusted times + */ +public class AutoFormattingTime { + double timeInSeconds; // in Seconds + int precision; // for format + + public AutoFormattingTime(double timeInSeconds, int precision) { + this.timeInSeconds = timeInSeconds; + this.precision = precision; + } + + public AutoFormattingTime(double timeInSeconds) { + this(timeInSeconds, 1); + } + + public double getTimeInSeconds() { + return timeInSeconds; + } + + /** + * Instead of 10000 s, returns 2.8 hours + * @return + */ + public String toString() { + double unitTime = timeInSeconds; + String unit = "s"; + + if ( timeInSeconds > 120 ) { + unitTime = timeInSeconds / 60; // minutes + unit = "m"; + + if ( unitTime > 120 ) { + unitTime /= 60; // hours + unit = "h"; + + if ( unitTime > 100 ) { + unitTime /= 24; // days + unit = "d"; + + if ( unitTime > 20 ) { + unitTime /= 7; // days + unit = "w"; + } + } + } + } + + return String.format("%6."+precision+"f %s", unitTime, unit); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java b/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java index 1e0988bb7..39d5c1497 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactory.java @@ -23,7 +23,11 @@ */ package org.broadinstitute.sting.utils.threading; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; import org.apache.log4j.Logger; +import org.apache.log4j.Priority; +import org.broadinstitute.sting.utils.AutoFormattingTime; import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; @@ -36,7 +40,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadFactory; /** - * Create threads, collecting statistics about their running state over time + * Create activeThreads, collecting statistics about their running state over time * * Uses a ThreadMXBean to capture info via ThreadInfo * @@ -44,34 +48,91 @@ import java.util.concurrent.ThreadFactory; * Date: 8/14/12 * Time: 8:47 AM */ +@Invariant({ + "activeThreads.size() <= nThreadsToCreate", + "countDownLatch.getCount() <= nThreadsToCreate", + "nThreadsToCreated <= nThreadsToCreate" +}) public class StateMonitoringThreadFactory implements ThreadFactory { protected static final boolean DEBUG = false; private static Logger logger = Logger.getLogger(StateMonitoringThreadFactory.class); public static final List TRACKED_STATES = Arrays.asList(Thread.State.BLOCKED, Thread.State.RUNNABLE, Thread.State.WAITING); - final int threadsToCreate; - final List threads; + // todo -- it would be nice to not have to specify upfront the number of threads. + // todo -- can we dynamically increment countDownLatch? It seems not... + final int nThreadsToCreate; + final List activeThreads; final EnumMap times = new EnumMap(Thread.State.class); + + int nThreadsToCreated = 0; + + /** + * The bean used to get the thread info about blocked and waiting times + */ final ThreadMXBean bean; - final CountDownLatch activeThreads; - public StateMonitoringThreadFactory(final int threadsToCreate) { - if ( threadsToCreate <= 0 ) throw new IllegalArgumentException("threadsToCreate <= 0: " + threadsToCreate); + /** + * Counts down the number of active activeThreads whose runtime info hasn't been incorporated into + * times. Counts down from nThreadsToCreate to 0, at which point any code waiting + * on the final times is freed to run. + */ + final CountDownLatch countDownLatch; - this.threadsToCreate = threadsToCreate; - threads = new ArrayList(threadsToCreate); - for ( final Thread.State state : Thread.State.values() ) - times.put(state, 0l); - bean = ManagementFactory.getThreadMXBean(); - bean.setThreadContentionMonitoringEnabled(true); - bean.setThreadCpuTimeEnabled(true); - activeThreads = new CountDownLatch(threadsToCreate); + /** + * Instead of RUNNABLE we want to print running. This map goes from Thread.State names to human readable ones + */ + final static EnumMap PRETTY_NAMES = new EnumMap(Thread.State.class); + static { + PRETTY_NAMES.put(Thread.State.RUNNABLE, "running"); + PRETTY_NAMES.put(Thread.State.BLOCKED, "blocked"); + PRETTY_NAMES.put(Thread.State.WAITING, "waiting"); } + /** + * Create a new factory generating threads whose runtime and contention + * behavior is tracked in this factory. + * + * @param nThreadsToCreate the number of threads we will create in the factory before it's considered complete + * // TODO -- remove argument when we figure out how to implement this capability + */ + public StateMonitoringThreadFactory(final int nThreadsToCreate) { + if ( nThreadsToCreate <= 0 ) throw new IllegalArgumentException("nThreadsToCreate <= 0: " + nThreadsToCreate); + + this.nThreadsToCreate = nThreadsToCreate; + activeThreads = new ArrayList(nThreadsToCreate); + + // initialize times to 0 + for ( final Thread.State state : Thread.State.values() ) + times.put(state, 0l); + + // get the bean, and start tracking + bean = ManagementFactory.getThreadMXBean(); + if ( bean.isThreadContentionMonitoringSupported() ) + bean.setThreadContentionMonitoringEnabled(true); + else + logger.warn("Thread contention monitoring not supported, we cannot track GATK multi-threaded efficiency"); + //bean.setThreadCpuTimeEnabled(true); + + countDownLatch = new CountDownLatch(nThreadsToCreate); + } + + /** + * Get the time spent in state across all threads created by this factory + * + * @param state on of the TRACKED_STATES + * @return the time in milliseconds + */ + @Ensures({"result >= 0", "TRACKED_STATES.contains(state)"}) public synchronized long getStateTime(final Thread.State state) { return times.get(state); } + /** + * Get the total time spent in all states across all threads created by this factory + * + * @return the time in milliseconds + */ + @Ensures({"result >= 0"}) public synchronized long getTotalTime() { long total = 0; for ( final long time : times.values() ) @@ -79,16 +140,27 @@ public class StateMonitoringThreadFactory implements ThreadFactory { return total; } + /** + * Get the fraction of time spent in state across all threads created by this factory + * + * @return the fraction (0.0-1.0) of time spent in state over all state times of all threads + */ + @Ensures({"result >= 0.0", "result <= 1.0", "TRACKED_STATES.contains(state)"}) public synchronized double getStateFraction(final Thread.State state) { - return getStateTime(state) / (1.0 * getTotalTime()); + return getStateTime(state) / (1.0 * Math.max(getTotalTime(), 1)); } - public int getNThreads() { - return threads.size(); + /** + * How many threads have been created by this factory so far? + * @return + */ + @Ensures("result >= 0") + public int getNThreadsCreated() { + return nThreadsToCreated; } public void waitForAllThreadsToComplete() throws InterruptedException { - activeThreads.await(); + countDownLatch.await(); } @Override @@ -103,33 +175,108 @@ public class StateMonitoringThreadFactory implements ThreadFactory { return b.toString(); } - @Override - public synchronized Thread newThread(final Runnable runnable) { - if ( threads.size() >= threadsToCreate ) - throw new IllegalStateException("Attempting to create more threads than allowed by constructor argument threadsToCreate " + threadsToCreate); + /** + * Print usage information about threads from this factory to logger + * with the INFO priority + * + * @param logger + */ + public synchronized void printUsageInformation(final Logger logger) { + printUsageInformation(logger, Priority.INFO); + } + /** + * Print usage information about threads from this factory to logger + * with the provided priority + * + * @param logger + */ + public synchronized void printUsageInformation(final Logger logger, final Priority priority) { + logger.log(priority, "Number of activeThreads used: " + getNThreadsCreated()); + logger.log(priority, "Total runtime " + new AutoFormattingTime(getTotalTime() / 1000.0)); + for ( final Thread.State state : TRACKED_STATES ) { + logger.log(priority, String.format(" Fraction of time spent %s is %.2f (%s)", + prettyName(state), getStateFraction(state), new AutoFormattingTime(getStateTime(state) / 1000.0))); + } + logger.log(priority, String.format("Efficiency of multi-threading: %.2f%% of time spent doing productive work", + getStateFraction(Thread.State.RUNNABLE) * 100)); + } + + private String prettyName(final Thread.State state) { + return PRETTY_NAMES.get(state); + } + + /** + * Create a new thread from this factory + * + * @param runnable + * @return + */ + @Override + @Ensures({ + "activeThreads.size() > old(activeThreads.size())", + "activeThreads.contains(result)", + "nThreadsToCreated == old(nThreadsToCreated) + 1" + }) + public synchronized Thread newThread(final Runnable runnable) { + if ( activeThreads.size() >= nThreadsToCreate) + throw new IllegalStateException("Attempting to create more activeThreads than allowed by constructor argument nThreadsToCreate " + nThreadsToCreate); + + nThreadsToCreated++; final Thread myThread = new TrackingThread(runnable); - threads.add(myThread); + activeThreads.add(myThread); return myThread; } - // TODO -- add polling capability - - private synchronized void updateThreadInfo(final Thread thread, final long runtime) { + /** + * Update the information about completed thread that ran for runtime in milliseconds + * + * This method updates all of the key timing and tracking information in the factory so that + * thread can be retired. After this call the factory shouldn't have a pointer to the thread any longer + * + * @param thread + * @param runtimeInMilliseconds + */ + @Ensures({ + "activeThreads.size() < old(activeThreads.size())", + "! activeThreads.contains(thread)", + "getTotalTime() >= old(getTotalTime())", + "countDownLatch.getCount() < old(countDownLatch.getCount())" + }) + private synchronized void threadIsDone(final Thread thread, final long runtimeInMilliseconds) { + if ( DEBUG ) logger.warn(" Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName()); if ( DEBUG ) logger.warn("UpdateThreadInfo called"); + final ThreadInfo info = bean.getThreadInfo(thread.getId()); if ( info != null ) { - if ( DEBUG ) logger.warn("Updating thread total runtime " + runtime + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime()); + if ( DEBUG ) logger.warn("Updating thread total runtime " + runtimeInMilliseconds + " of which blocked " + info.getBlockedTime() + " and waiting " + info.getWaitedTime()); incTimes(Thread.State.BLOCKED, info.getBlockedTime()); incTimes(Thread.State.WAITING, info.getWaitedTime()); - incTimes(Thread.State.RUNNABLE, runtime - info.getWaitedTime() - info.getBlockedTime()); + incTimes(Thread.State.RUNNABLE, runtimeInMilliseconds - info.getWaitedTime() - info.getBlockedTime()); } + + // remove the thread from the list of active activeThreads + if ( ! activeThreads.remove(thread) ) + throw new IllegalStateException("Thread " + thread + " not in list of active activeThreads"); + + // one less thread is live for those blocking on all activeThreads to be complete + countDownLatch.countDown(); + if ( DEBUG ) logger.warn(" -> Countdown " + countDownLatch.getCount() + " in thread " + Thread.currentThread().getName()); } + /** + * Helper function that increments the times counter by by for state + * + * @param state + * @param by + */ private synchronized void incTimes(final Thread.State state, final long by) { times.put(state, times.get(state) + by); } + /** + * A wrapper around Thread that tracks the runtime of the thread and calls threadIsDone() when complete + */ private class TrackingThread extends Thread { private TrackingThread(Runnable runnable) { super(runnable); @@ -140,10 +287,7 @@ public class StateMonitoringThreadFactory implements ThreadFactory { final long startTime = System.currentTimeMillis(); super.run(); final long endTime = System.currentTimeMillis(); - if ( DEBUG ) logger.warn(" Countdown " + activeThreads.getCount() + " in thread " + Thread.currentThread().getName()); - updateThreadInfo(this, endTime - startTime); - activeThreads.countDown(); - if ( DEBUG ) logger.warn(" -> Countdown " + activeThreads.getCount() + " in thread " + Thread.currentThread().getName()); + threadIsDone(this, endTime - startTime); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/threading/package-info.java b/public/java/src/org/broadinstitute/sting/utils/threading/package-info.java index dc350920e..d72dad471 100644 --- a/public/java/src/org/broadinstitute/sting/utils/threading/package-info.java +++ b/public/java/src/org/broadinstitute/sting/utils/threading/package-info.java @@ -1,4 +1,4 @@ /** - * Provides tools for managing threads, thread pools, and parallelization in general. + * Provides tools for managing activeThreads, thread pools, and parallelization in general. */ package org.broadinstitute.sting.utils.threading; diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java index 6fc852bbf..c22b49c23 100755 --- a/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java @@ -23,6 +23,7 @@ */ package org.broadinstitute.sting.utils.threading; +import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -30,7 +31,6 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -40,6 +40,7 @@ import java.util.concurrent.*; * Tests for the state monitoring thread factory. */ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { + // the duration of the tests -- 100 ms is tolerable given the number of tests we are doing private final static long THREAD_TARGET_DURATION_IN_MILLISECOND = 100; final static Object GLOBAL_LOCK = new Object(); @@ -68,10 +69,16 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { } } + /** + * Test helper threading class that puts the thread into RUNNING, BLOCKED, or WAITING state as + * requested for input argument + */ private static class StateTestThread implements Callable { private final Thread.State stateToImplement; private StateTestThread(final Thread.State stateToImplement) { + if ( ! StateMonitoringThreadFactory.TRACKED_STATES.contains(stateToImplement) ) + throw new IllegalArgumentException("Unexpected state " + stateToImplement); this.stateToImplement = stateToImplement; } @@ -92,6 +99,7 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { case BLOCKED: if ( StateMonitoringThreadFactory.DEBUG ) logger.warn("Blocking..."); synchronized (GLOBAL_LOCK) { + // the GLOBAL_LOCK must be held by the unit test itself for this to properly block if ( StateMonitoringThreadFactory.DEBUG ) logger.warn(" ... done blocking"); } return 0.0; @@ -103,7 +111,7 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { @DataProvider(name = "StateTest") public Object[][] createStateTest() { - for ( final int nThreads : Arrays.asList(1, 2, 3, 4, 5) ) { + for ( final int nThreads : Arrays.asList(1, 2, 3, 4) ) { for (final List states : Utils.makeCombinations(StateMonitoringThreadFactory.TRACKED_STATES, nThreads) ) { //if ( Collections.frequency(states, Thread.State.BLOCKED) > 0) new StateTest(states); @@ -125,7 +133,7 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { for ( final Thread.State threadToRunState : test.getStatesForThreads() ) threadPool.submit(new StateTestThread(threadToRunState)); - // lock has to be here for the whole running of the threads but end before the sleep so the blocked threads + // lock has to be here for the whole running of the activeThreads but end before the sleep so the blocked activeThreads // can block for their allotted time threadPool.shutdown(); Thread.sleep(THREAD_TARGET_DURATION_IN_MILLISECOND); @@ -133,29 +141,35 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { //logger.warn(" Releasing lock"); threadPool.awaitTermination(10, TimeUnit.SECONDS); //logger.warn(" done awaiting termination"); - //logger.warn(" waiting for all threads to complete"); + //logger.warn(" waiting for all activeThreads to complete"); factory.waitForAllThreadsToComplete(); - //logger.warn(" done waiting for threads"); + //logger.warn(" done waiting for activeThreads"); // make sure we counted everything properly final long totalTime = factory.getTotalTime(); final long minTime = (THREAD_TARGET_DURATION_IN_MILLISECOND - 10) * test.getNStates(); + final long maxTime = (THREAD_TARGET_DURATION_IN_MILLISECOND + 10) * test.getNStates(); //logger.warn("Testing total time"); Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime); + Assert.assertTrue(totalTime <= maxTime, "Factory results not properly accumulated: totalTime = " + totalTime + " > maxTime = " + maxTime); for (final Thread.State state : StateMonitoringThreadFactory.TRACKED_STATES ) { final double min = test.minStateFraction(state); final double max = test.maxStateFraction(state); final double obs = factory.getStateFraction(state); - logger.warn(" Checking " + state - + " min " + String.format("%.2f", min) - + " max " + String.format("%.2f", max) - + " obs " + String.format("%.2f", obs) - + " factor = " + factory); +// logger.warn(" Checking " + state +// + " min " + String.format("%.2f", min) +// + " max " + String.format("%.2f", max) +// + " obs " + String.format("%.2f", obs) +// + " factor = " + factory); Assert.assertTrue(obs >= min, "Too little time spent in state " + state + " obs " + obs + " min " + min); Assert.assertTrue(obs <= max, "Too much time spent in state " + state + " obs " + obs + " max " + min); } - Assert.assertEquals(factory.getNThreads(), test.getNStates()); + // we actually ran the expected number of activeThreads + Assert.assertEquals(factory.getNThreadsCreated(), test.getNStates()); + + // should be called to ensure we don't format / NPE on output + factory.printUsageInformation(logger, Priority.INFO); } } \ No newline at end of file From bd7ed0d02889e683dd4494d73357d95299fd837d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 14 Aug 2012 22:08:49 -0400 Subject: [PATCH 126/176] Enable efficient parallel output of BCF2 -- Previous IO stub was hardcoded to write VCF. So when you ran -nt 2 -o my.bcf you actually created intermediate VCF files that were then encoded single threaded as BCF. Now we emit natively per thread BCF, and use the fast mergeInfo code to read BCF -> write BCF. Upcoming optimizations to avoid decoding genotype data unnecessarily will enable us to really quickly process BCF2 in parallel -- VariantContextWriterStub forces BCF output for intermediate files -- Nicer debug log message in BCF2Codec -- Turn off debug logging of BCF2LazyGenotypesDecoder -- BCF2FieldWriterManager now uses .debug not .info, so you won't see all of that field manager debugging info with BCF2 any longer -- VariantContextWriterFactory.isBCFOutput now has version that accepts just a file path, not path + options --- .../storage/VariantContextWriterStorage.java | 46 +++++++++++-------- .../io/stubs/VariantContextWriterStub.java | 4 ++ .../sting/utils/codecs/bcf2/BCF2Codec.java | 2 +- .../codecs/bcf2/BCF2LazyGenotypesDecoder.java | 5 +- .../writer/BCF2FieldWriterManager.java | 2 +- .../writer/VariantContextWriterFactory.java | 10 ++++ 6 files changed, 45 insertions(+), 24 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index fb05a6b04..161179f84 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -27,9 +27,10 @@ package org.broadinstitute.sting.gatk.io.storage; import net.sf.samtools.util.BlockCompressedOutputStream; import org.apache.log4j.Logger; import org.broad.tribble.AbstractFeatureReader; +import org.broad.tribble.FeatureCodec; import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -81,6 +82,18 @@ public class VariantContextWriterStorage implements Storage source = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false); + final String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin"; + logger.debug(String.format("Merging %s into %s",file.getAbsolutePath(),targetFilePath)); + + // use the feature manager to determine the right codec for the tmp file + // that way we don't assume it's a specific type + final FeatureManager.FeatureDescriptor fd = new FeatureManager().getByFiletype(file); + if ( fd == null ) + throw new ReviewedStingException("Unexpectedly couldn't find valid codec for temporary output file " + file); + + final FeatureCodec codec = fd.getCodec(); + final AbstractFeatureReader source = + AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), codec, false); - for ( VariantContext vc : source.iterator() ) { + for ( final VariantContext vc : source.iterator() ) { target.writer.add(vc); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java index 6ed889eb6..bea7172ea 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.variantcontext.writer.Options; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.io.OutputStream; @@ -186,6 +187,9 @@ public class VariantContextWriterStub implements Stub, Var if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER); if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY); + if ( getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile()) ) + options.add(Options.FORCE_BCF); + return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 570ca7c1c..67e189d11 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -149,7 +149,7 @@ public final class BCF2Codec implements FeatureCodec { if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION ) error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion()); - logger.info("BCF version " + bcfVersion); + logger.info("Parsing data stream with BCF version " + bcfVersion); final int headerSizeInBytes = BCF2Type.INT32.read(inputStream); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java index cf34a8b48..513b9fcb5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java @@ -63,9 +63,8 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser { @Override public LazyGenotypesContext.LazyData parse(final Object data) { - if ( logger.isDebugEnabled() ) - logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each"); - +// if ( logger.isDebugEnabled() ) +// logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each"); try { // load our byte[] data into the decoder diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java index 219daf315..7b8224568 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java @@ -76,7 +76,7 @@ public class BCF2FieldWriterManager { if ( map.containsKey(field) ) throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders"); map.put(field, writer); - logger.info(writer); + if ( logger.isDebugEnabled() ) logger.debug(writer); } // ----------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java index f23166a02..035aff7d6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java @@ -84,6 +84,16 @@ public class VariantContextWriterFactory { } } + /** + * Should we output a BCF file based solely on the name of the file at location? + * + * @param location + * @return + */ + public static boolean isBCFOutput(final File location) { + return isBCFOutput(location, EnumSet.noneOf(Options.class)); + } + public static boolean isBCFOutput(final File location, final EnumSet options) { return options.contains(Options.FORCE_BCF) || (location != null && location.getName().contains(".bcf")); } From ae4d4482ac7a176191fb3c1711e085f427723687 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 14 Aug 2012 22:10:07 -0400 Subject: [PATCH 127/176] Parallel combine variants! -- CombineVariants is now TreeReducible! -- Integration tests running in parallel all pass except one (will fix) due to incorrect use of db=0 flag on input from old VCF format --- .../sting/gatk/walkers/variantutils/CombineVariants.java | 8 +++++++- .../variantutils/CombineVariantsIntegrationTest.java | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index d7dff030f..d6504e841 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; import org.broadinstitute.sting.utils.SampleUtils; @@ -99,7 +100,7 @@ import java.util.*; */ @DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-50,stop=50)) -public class CombineVariants extends RodWalker { +public class CombineVariants extends RodWalker implements TreeReducible { /** * The VCF files to merge together * @@ -313,5 +314,10 @@ public class CombineVariants extends RodWalker { return counter + sum; } + @Override + public Integer treeReduce(Integer lhs, Integer rhs) { + return reduce(lhs, rhs); + } + public void onTraversalDone(Integer sum) {} } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index c8551657a..2f28a6dbc 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -50,7 +50,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { private void cvExecuteTest(final String name, final WalkerTestSpec spec) { spec.disableShadowBCF(); - executeTest(name, spec); + executeTestParallel(name, spec); } public void test1InOut(String file, String md5) { From d70fd189003dfedb8319caac5acfb32ed1197c15 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 14 Aug 2012 22:10:42 -0400 Subject: [PATCH 128/176] Minor increase in tolerance to sum of states in UnitTest for StateMonitoringThreadFactory --- .../utils/threading/StateMonitoringThreadFactoryUnitTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java index c22b49c23..87074364a 100755 --- a/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java @@ -147,8 +147,8 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { // make sure we counted everything properly final long totalTime = factory.getTotalTime(); - final long minTime = (THREAD_TARGET_DURATION_IN_MILLISECOND - 10) * test.getNStates(); - final long maxTime = (THREAD_TARGET_DURATION_IN_MILLISECOND + 10) * test.getNStates(); + final long minTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 0.5) * test.getNStates(); + final long maxTime = (long)(THREAD_TARGET_DURATION_IN_MILLISECOND * 1.5) * test.getNStates(); //logger.warn("Testing total time"); Assert.assertTrue(totalTime >= minTime, "Factory results not properly accumulated: totalTime = " + totalTime + " < minTime = " + minTime); Assert.assertTrue(totalTime <= maxTime, "Factory results not properly accumulated: totalTime = " + totalTime + " > maxTime = " + maxTime); From dafa7e38851326c2f685f91b3fef98c8ec11b8c6 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 15 Aug 2012 14:26:50 -0400 Subject: [PATCH 130/176] Temporarily disable StateMonitoringThreadTests while I get them reliably working across platforms --- .../utils/threading/StateMonitoringThreadFactoryUnitTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java index 87074364a..5a606c50e 100755 --- a/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/threading/StateMonitoringThreadFactoryUnitTest.java @@ -112,7 +112,7 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { @DataProvider(name = "StateTest") public Object[][] createStateTest() { for ( final int nThreads : Arrays.asList(1, 2, 3, 4) ) { - for (final List states : Utils.makeCombinations(StateMonitoringThreadFactory.TRACKED_STATES, nThreads) ) { + for (final List states : Utils.makePermutations(StateMonitoringThreadFactory.TRACKED_STATES, nThreads, true) ) { //if ( Collections.frequency(states, Thread.State.BLOCKED) > 0) new StateTest(states); } @@ -121,7 +121,7 @@ public class StateMonitoringThreadFactoryUnitTest extends BaseTest { return StateTest.getTests(StateTest.class); } - @Test(enabled = true, dataProvider = "StateTest") + @Test(enabled = false, dataProvider = "StateTest") public void testStateTest(final StateTest test) throws InterruptedException { // allows us to test blocking final StateMonitoringThreadFactory factory = new StateMonitoringThreadFactory(test.getNStates()); From 669c43031aec418ff1cb550b68c1233ecb8d9835 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 15 Aug 2012 14:36:06 -0400 Subject: [PATCH 131/176] BCF2 optimizations; parallel CombineVariants -- BCF2 now determines whether it can safely write out raw genotype blocks, which is true in the case where the VCF header of the input is a complete, ordered subset of the output header. Added utilities to determine this and extensive unit tests (headerLinesAreOrderedConsistently) -- Cleanup collapseStringList and exploreStringList for new unit tests of BCF2Utils. Fixed bug in edge case that never occurred in practice -- VCFContigHeaderLine now provides its own key (VCFHeader.CONTIG_KEY) directly instead of requiring the user to provide it (and hoping its right) -- More ways to access the data in VCFHeader -- BCF2Writer uses a cache to avoid recomputing unnecessarily whether raw genotype blocks can be emitted directly into the output -- Optimization of fullyDecodeAttributes -- attributes.size() is expensive and unnecessary. We just guess that on average we need ~10 elements for the attribute map -- CombineVariants optimization -- filters are online HashSet but are sorted at the end by creating a TreeSet -- makeCombinations is now makePermutations, and you can request to create the permutations with or without replacement --- .../walkers/variantutils/CombineVariants.java | 6 + .../org/broadinstitute/sting/utils/Utils.java | 13 +- .../sting/utils/codecs/bcf2/BCF2Codec.java | 9 +- .../codecs/bcf2/BCF2LazyGenotypesDecoder.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Utils.java | 65 +++++++- .../utils/codecs/vcf/VCFContigHeaderLine.java | 4 +- .../sting/utils/codecs/vcf/VCFHeader.java | 61 ++++++- .../sting/utils/codecs/vcf/VCFUtils.java | 2 +- .../utils/variantcontext/VariantContext.java | 2 +- .../variantcontext/VariantContextUtils.java | 5 +- .../variantcontext/writer/BCF2Encoder.java | 2 +- .../writer/BCF2FieldEncoder.java | 1 - .../variantcontext/writer/BCF2Writer.java | 47 ++++-- .../utils/codecs/bcf2/BCF2UtilsUnitTest.java | 155 ++++++++++++++++++ .../VariantContextTestProvider.java | 4 +- 15 files changed, 334 insertions(+), 44 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index d6504e841..8dabd49b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -67,6 +67,12 @@ import java.util.*; * VCF and then run SelectVariants to extract the common records with -select 'set == "Intersection"', as worked out * in the detailed example on the wiki. * + * Note that CombineVariants supports multi-threaded parallelism (8/15/12). This is particularly useful + * when converting from VCF to BCF2, which can be expensive. In this case each thread spends CPU time + * doing the conversion, and the GATK engine is smart enough to merge the partial BCF2 blocks together + * efficiency. However, since this merge runs in only one thread, you can quickly reach diminishing + * returns with the number of parallel threads. -nt 4 works well but -nt 8 may be too much. + * *

Input

*

* One or more variant sets to combine. diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 14d7177a0..476098ae6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -32,7 +32,6 @@ import net.sf.samtools.util.StringUtil; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.net.InetAddress; @@ -742,19 +741,23 @@ public class Utils { * @param objects * @param n * @param + * @param withReplacement if false, the resulting permutations will only contain unique objects from objects * @return */ - public static List> makeCombinations(final List objects, final int n) { + public static List> makePermutations(final List objects, final int n, final boolean withReplacement) { final List> combinations = new ArrayList>(); - if ( n == 1 ) { + if ( n <= 0 ) + ; + else if ( n == 1 ) { for ( final T o : objects ) combinations.add(Collections.singletonList(o)); } else { - final List> sub = makeCombinations(objects, n - 1); + final List> sub = makePermutations(objects, n - 1, withReplacement); for ( List subI : sub ) { for ( final T a : objects ) { - combinations.add(Utils.cons(a, subI)); + if ( withReplacement || ! subI.contains(a) ) + combinations.add(Utils.cons(a, subI)); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 67e189d11..ac6348f80 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -423,9 +423,8 @@ public final class BCF2Codec implements FeatureCodec { final LazyGenotypesContext.LazyParser lazyParser = new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders); - LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, - new LazyData(siteInfo.nFormatFields, decoder.getRecordBytes()), - header.getNGenotypeSamples()); + final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes()); + final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples()); // did we resort the sample names? If so, we need to load the genotype data if ( !header.samplesWereAlreadySorted() ) @@ -436,11 +435,13 @@ public final class BCF2Codec implements FeatureCodec { } public static class LazyData { + final public VCFHeader header; final public int nGenotypeFields; final public byte[] bytes; @Requires({"nGenotypeFields > 0", "bytes != null"}) - public LazyData(final int nGenotypeFields, final byte[] bytes) { + public LazyData(final VCFHeader header, final int nGenotypeFields, final byte[] bytes) { + this.header = header; this.nGenotypeFields = nGenotypeFields; this.bytes = bytes; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java index 513b9fcb5..46b1fa6c1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java @@ -39,7 +39,7 @@ import java.util.*; * @author Mark DePristo * @since 5/12 */ -class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser { +public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser { final protected static Logger logger = Logger.getLogger(BCF2LazyGenotypesDecoder.class); // the essential information for us to use to decode the genotypes data diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index e6e78d89d..2ac916db1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -131,17 +131,21 @@ public final class BCF2Utils { * @param strings size > 1 list of strings * @return */ - @Requires({"strings != null", "strings.size() > 1"}) + @Requires({"strings != null"}) @Ensures("result != null") public static String collapseStringList(final List strings) { - final StringBuilder b = new StringBuilder(); - for ( final String s : strings ) { - if ( s != null ) { - assert s.indexOf(",") == -1; // no commas in individual strings - b.append(",").append(s); + if ( strings.isEmpty() ) return ""; + else if ( strings.size() == 1 ) return strings.get(0); + else { + final StringBuilder b = new StringBuilder(); + for ( final String s : strings ) { + if ( s != null ) { + assert s.indexOf(",") == -1; // no commas in individual strings + b.append(",").append(s); + } } + return b.toString(); } - return b.toString(); } /** @@ -163,7 +167,7 @@ public final class BCF2Utils { @Requires("s != null") public static boolean isCollapsedString(final String s) { - return s.charAt(0) == ','; + return s.length() > 0 && s.charAt(0) == ','; } /** @@ -280,4 +284,49 @@ public final class BCF2Utils { else if ( o instanceof List ) return (List)o; else return Collections.singletonList(o); } + + /** + * Are the elements and their order in the output and input headers consistent so that + * we can write out the raw genotypes block without decoding and recoding it? + * + * If the order of INFO, FILTER, or contrig elements in the output header is different than + * in the input header we must decode the blocks using the input header and then recode them + * based on the new output order. + * + * If they are consistent, we can simply pass through the raw genotypes block bytes, which is + * a *huge* performance win for large blocks. + * + * Many common operations on BCF2 files (merging them for -nt, selecting a subset of records, etc) + * don't modify the ordering of the header fields and so can safely pass through the genotypes + * undecoded. Some operations -- those at add filters or info fields -- can change the ordering + * of the header fields and so produce invalid BCF2 files if the genotypes aren't decoded + */ + public static boolean headerLinesAreOrderedConsistently(final VCFHeader outputHeader, final VCFHeader genotypesBlockHeader) { + // first, we have to have the same samples in the same order + if ( ! nullAsEmpty(outputHeader.getSampleNamesInOrder()).equals(nullAsEmpty(genotypesBlockHeader.getSampleNamesInOrder())) ) + return false; + + final Iterator outputLinesIt = outputHeader.getIDHeaderLines().iterator(); + final Iterator inputLinesIt = genotypesBlockHeader.getIDHeaderLines().iterator(); + + while ( inputLinesIt.hasNext() ) { + if ( ! outputLinesIt.hasNext() ) // missing lines in output + return false; + + final VCFIDHeaderLine outputLine = outputLinesIt.next(); + final VCFIDHeaderLine inputLine = inputLinesIt.next(); + + if ( ! inputLine.getClass().equals(outputLine.getClass()) || ! inputLine.getID().equals(outputLine.getID()) ) + return false; + } + + return true; + } + + private static List nullAsEmpty(List l) { + if ( l == null ) + return Collections.emptyList(); + else + return l; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java index d5d76cab7..35cc75af2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java @@ -47,8 +47,8 @@ public class VCFContigHeaderLine extends VCFSimpleHeaderLine { this.contigIndex = contigIndex; } - public VCFContigHeaderLine(final String key, final Map mapping, int contigIndex) { - super(key, mapping, null); + public VCFContigHeaderLine(final Map mapping, int contigIndex) { + super(VCFHeader.CONTIG_KEY, mapping, null); this.contigIndex = contigIndex; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 7a9329583..2663e848f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -53,10 +53,10 @@ public class VCFHeader { // the associated meta data private final Set mMetaData = new LinkedHashSet(); - private final Map mInfoMetaData = new HashMap(); - private final Map mFormatMetaData = new HashMap(); - private final Map mFilterMetaData = new HashMap(); - private final Map mOtherMetaData = new HashMap(); + private final Map mInfoMetaData = new LinkedHashMap(); + private final Map mFormatMetaData = new LinkedHashMap(); + private final Map mFilterMetaData = new LinkedHashMap(); + private final Map mOtherMetaData = new LinkedHashMap(); private final List contigMetaData = new ArrayList(); // the list of auxillary tags @@ -101,6 +101,15 @@ public class VCFHeader { loadMetaDataMaps(); } + /** + * Creates a shallow copy of the meta data in VCF header toCopy + * + * @param toCopy + */ + public VCFHeader(final VCFHeader toCopy) { + this(toCopy.mMetaData); + } + /** * create a VCF header, given a list of meta data and auxillary tags * @@ -153,12 +162,39 @@ public class VCFHeader { } /** - * @return all of the VCF header lines of the ##contig form in order, or an empty set if none were present + * @return all of the VCF header lines of the ##contig form in order, or an empty list if none were present */ public List getContigLines() { return Collections.unmodifiableList(contigMetaData); } + + /** + * @return all of the VCF FILTER lines in their original file order, or an empty list if none were present + */ + public List getFilterLines() { + final List filters = new ArrayList(); + for ( VCFHeaderLine line : mMetaData ) { + if ( line instanceof VCFFilterHeaderLine ) { + filters.add((VCFFilterHeaderLine)line); + } + } + return filters; + } + + /** + * @return all of the VCF FILTER lines in their original file order, or an empty list if none were present + */ + public List getIDHeaderLines() { + final List filters = new ArrayList(); + for ( VCFHeaderLine line : mMetaData ) { + if ( line instanceof VCFIDHeaderLine ) { + filters.add((VCFIDHeaderLine)line); + } + } + return filters; + } + /** * check our metadata for a VCF version tag, and throw an exception if the version is out of date * or the version is not present @@ -299,10 +335,16 @@ public class VCFHeader { return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0); } + /** + * Returns the INFO HeaderLines in their original ordering + */ public Collection getInfoHeaderLines() { return mInfoMetaData.values(); } + /** + * Returns the FORMAT HeaderLines in their original ordering + */ public Collection getFormatHeaderLines() { return mFormatMetaData.values(); } @@ -390,4 +432,13 @@ public class VCFHeader { public HashMap getSampleNameToOffset() { return sampleNameToOffset; } + + @Override + public String toString() { + final StringBuilder b = new StringBuilder(); + b.append("[VCFHeader:"); + for ( final VCFHeaderLine line : mMetaData ) + b.append("\n\t").append(line); + return b.append("\n]").toString(); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index 561e8e78d..be87e7306 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -301,7 +301,7 @@ public class VCFUtils { map.put("ID", contig.getSequenceName()); map.put("length", String.valueOf(contig.getSequenceLength())); if ( assembly != null ) map.put("assembly", assembly); - return new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, map, contig.getSequenceIndex()); + return new VCFContigHeaderLine(map, contig.getSequenceIndex()); } private static String getReferenceAssembly(final String refPath) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 2211cfe5e..1fe6b8652 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1351,7 +1351,7 @@ public class VariantContext implements Feature { // to enable tribble integratio private final Map fullyDecodeAttributes(final Map attributes, final VCFHeader header, final boolean lenientDecoding) { - final Map newAttributes = new HashMap(attributes.size()); + final Map newAttributes = new HashMap(10); for ( final Map.Entry attr : attributes.entrySet() ) { final String field = attr.getKey(); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index a8f956413..e571bb4c1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -504,7 +504,7 @@ public class VariantContextUtils { Byte referenceBaseForIndel = null; final Set alleles = new LinkedHashSet(); - final Set filters = new TreeSet(); + final Set filters = new HashSet(); final Map attributes = new TreeMap(); final Set inconsistentAttributes = new HashSet(); final Set variantSources = new HashSet(); // contains the set of sources we found in our set of VCs that are variant @@ -656,7 +656,8 @@ public class VariantContextUtils { builder.alleles(alleles); builder.genotypes(genotypes); builder.log10PError(log10PError); - builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); + builder.filters(filters.isEmpty() ? filters : new TreeSet(filters)); + builder.attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); // Trim the padded bases of all alleles if necessary final VariantContext merged = builder.make(); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java index 01dac7eb6..22acc4787 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java @@ -124,7 +124,7 @@ public final class BCF2Encoder { @Ensures("encodeStream.size() > old(encodeStream.size())") public final void encodeTyped(List v, final BCF2Type type) throws IOException { if ( type == BCF2Type.CHAR && v.size() != 0 ) { - final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List) v) : (String)v.get(0); + final String s = BCF2Utils.collapseStringList((List) v); v = stringToBytes(s); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java index ddeb4d284..a91eb216d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java @@ -335,7 +335,6 @@ public abstract class BCF2FieldEncoder { else if (value instanceof List) { final List l = (List)value; if ( l.isEmpty() ) return ""; - else if ( l.size() == 1 ) return (String)l.get(0); else return BCF2Utils.collapseStringList(l); } else return (String)value; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index a080c4e62..e4c64b26b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -87,14 +87,6 @@ class BCF2Writer extends IndexingVariantContextWriter { public static final int MAJOR_VERSION = 2; public static final int MINOR_VERSION = 1; - /** - * If true, we will write out the undecoded raw bytes for a genotypes block, if it - * is found in the input VC. This can be very dangerous as the genotype encoding - * depends on the exact ordering of the header. - * - * TODO -- enable when the new smart VCF header code is created by Eric Banks - */ - private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false; final protected static Logger logger = Logger.getLogger(BCF2Writer.class); final private static boolean ALLOW_MISSING_CONTIG_LINES = false; @@ -108,6 +100,13 @@ class BCF2Writer extends IndexingVariantContextWriter { private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives final BCF2FieldWriterManager fieldManager = new BCF2FieldWriterManager(); + /** + * cached results for whether we can write out raw genotypes data. + */ + private VCFHeader lastVCFHeaderOfUnparsedGenotypes = null; + private boolean canPassOnUnparsedGenotypeDataForLastVCFHeader = false; + + public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) { super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing); this.outputStream = getOutputStream(); @@ -247,13 +246,39 @@ class BCF2Writer extends IndexingVariantContextWriter { return encoder.getRecordBytes(); } + + /** + * Can we safely write on the raw (undecoded) genotypes of an input VC? + * + * The cache depends on the undecoded lazy data header == lastVCFHeaderOfUnparsedGenotypes, in + * which case we return the previous result. If it's not cached, we use the BCF2Util to + * compare the VC header with our header (expensive) and cache it. + * + * @param lazyData + * @return + */ + private boolean canSafelyWriteRawGenotypesBytes(final BCF2Codec.LazyData lazyData) { + if ( lazyData.header != lastVCFHeaderOfUnparsedGenotypes ) { + // result is already cached + canPassOnUnparsedGenotypeDataForLastVCFHeader = BCF2Utils.headerLinesAreOrderedConsistently(this.header,lazyData.header); + lastVCFHeaderOfUnparsedGenotypes = lazyData.header; + } + + return canPassOnUnparsedGenotypeDataForLastVCFHeader; + } + private BCF2Codec.LazyData getLazyData(final VariantContext vc) { if ( vc.getGenotypes().isLazyWithData() ) { - LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes(); - if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData ) + final LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes(); + + if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData && + canSafelyWriteRawGenotypesBytes((BCF2Codec.LazyData) lgc.getUnparsedGenotypeData())) { + //logger.info("Passing on raw BCF2 genotypes data"); return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData(); - else + } else { + //logger.info("Decoding raw BCF2 genotypes data"); lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long + } } return null; diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java new file mode 100644 index 000000000..ae76a374a --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.bcf2; + +import org.broad.tribble.readers.PositionalBufferedStream; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.vcf.*; + +import java.io.*; +import java.util.*; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +/** + * Tests for BCF2Utils + */ +public final class BCF2UtilsUnitTest extends BaseTest { + @DataProvider(name = "CollapseExpandTest") + public Object[][] makeCollapseExpandTest() { + List tests = new ArrayList(); + tests.add(new Object[]{Arrays.asList("A"), "A", false}); + tests.add(new Object[]{Arrays.asList("A", "B"), ",A,B", true}); + tests.add(new Object[]{Arrays.asList("AB"), "AB", false}); + tests.add(new Object[]{Arrays.asList("AB", "C"), ",AB,C", true}); + tests.add(new Object[]{Arrays.asList(), "", false}); + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "CollapseExpandTest") + public void testCollapseExpandTest(final List in, final String expectedCollapsed, final boolean isCollapsed) { + final String actualCollapsed = BCF2Utils.collapseStringList(in); + Assert.assertEquals(actualCollapsed, expectedCollapsed); + Assert.assertEquals(BCF2Utils.isCollapsedString(actualCollapsed), isCollapsed); + if ( isCollapsed ) + Assert.assertEquals(BCF2Utils.explodeStringList(actualCollapsed), in); + } + + @DataProvider(name = "HeaderOrderTestProvider") + public Object[][] makeHeaderOrderTestProvider() { + final List inputLines = new ArrayList(); + final List extraLines = new ArrayList(); + + int counter = 0; + inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++))); + inputLines.add(new VCFFilterHeaderLine(String.valueOf(counter++))); + inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter)); + inputLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter)); + inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); + inputLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); + inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); + inputLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); + final int inputLineCounter = counter; + final VCFHeader inputHeader = new VCFHeader(new LinkedHashSet(inputLines)); + + extraLines.add(new VCFFilterHeaderLine(String.valueOf(counter++))); + extraLines.add(new VCFContigHeaderLine(Collections.singletonMap("ID", String.valueOf(counter++)), counter)); + extraLines.add(new VCFInfoHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); + extraLines.add(new VCFFormatHeaderLine(String.valueOf(counter++), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); + extraLines.add(new VCFHeaderLine("x", "misc")); + extraLines.add(new VCFHeaderLine("y", "misc")); + + List tests = new ArrayList(); + for ( final int extrasToTake : Arrays.asList(0, 1, 2, 3) ) { + final List empty = Collections.emptyList(); + final List> permutations = extrasToTake == 0 + ? Collections.singletonList(empty) + : Utils.makePermutations(extraLines, extrasToTake, false); + for ( final List permutation : permutations ) { + for ( int i = -1; i < inputLines.size(); i++ ) { + final List allLines = new ArrayList(inputLines); + if ( i >= 0 ) + allLines.remove(i); + allLines.addAll(permutation); + final VCFHeader testHeader = new VCFHeader(new LinkedHashSet(allLines)); + final boolean expectedConsistent = expectedConsistent(testHeader, inputLineCounter); + tests.add(new Object[]{inputHeader, testHeader, expectedConsistent}); + } + } + } + + // sample name tests + final List> sampleNameTests = Arrays.asList( + new ArrayList(), + Arrays.asList("A"), + Arrays.asList("A", "B"), + Arrays.asList("A", "B", "C")); + for ( final List inSamples : sampleNameTests ) { + for ( final List testSamples : sampleNameTests ) { + final VCFHeader inputHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), inSamples); + + final List> permutations = testSamples.isEmpty() + ? Collections.singletonList(testSamples) + : Utils.makePermutations(testSamples, testSamples.size(), false); + for ( final List testSamplesPermutation : permutations ) { + final VCFHeader testHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), testSamplesPermutation); + final boolean expectedConsistent = testSamples.equals(inSamples); + tests.add(new Object[]{inputHeaderWithSamples, testHeaderWithSamples, expectedConsistent}); + } + } + } + + return tests.toArray(new Object[][]{}); + } + + private static boolean expectedConsistent(final VCFHeader combinationHeader, final int minCounterForInputLines) { + final List ids = new ArrayList(); + for ( final VCFHeaderLine line : combinationHeader.getMetaDataInInputOrder() ) { + if ( line instanceof VCFIDHeaderLine ) { + ids.add(Integer.valueOf(((VCFIDHeaderLine) line).getID())); + } + } + + // as long as the start contains all of the ids up to minCounterForInputLines in order + for ( int i = 0; i < minCounterForInputLines; i++ ) + if ( i >= ids.size() || ids.get(i) != i ) + return false; + + return true; + } + + // + // Test to make sure that we detect correctly the case where we can preserve the genotypes data in a BCF2 + // even when the header file is slightly different + // + @Test(dataProvider = "HeaderOrderTestProvider") + public void testHeaderOrder(final VCFHeader inputHeader, final VCFHeader testHeader, final boolean expectedConsistent) { + final boolean actualOrderConsistency = BCF2Utils.headerLinesAreOrderedConsistently(testHeader, inputHeader); + Assert.assertEquals(actualOrderConsistency, expectedConsistent); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index dd1985be3..26e2dbfbc 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -197,7 +197,7 @@ public class VariantContextTestProvider { addHeaderLine(metaData, "FT", 1, VCFHeaderLineType.String); // prep the header - metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0)); + metaData.add(new VCFContigHeaderLine(Collections.singletonMap("ID", "1"), 0)); metaData.add(new VCFFilterHeaderLine("FILTER1")); metaData.add(new VCFFilterHeaderLine("FILTER2")); @@ -889,7 +889,7 @@ public class VariantContextTestProvider { } private static List> makeAllGenotypes(final List alleles, final int highestPloidy) { - return Utils.makeCombinations(alleles, highestPloidy); + return Utils.makePermutations(alleles, highestPloidy, true); } public static void assertEquals(final VCFHeader actual, final VCFHeader expected) { From c0a31b2e5b6038bac06df1c65e924ec5d6c92361 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 15 Aug 2012 21:12:21 -0400 Subject: [PATCH 132/176] CombineVariants parallel integration tests -- All tests but one (using old bad VCF3 input) run unmodified with parallel code. -- Disabled UNSAFE_VCF_PROCESSING for all but that test, which changes md5s because the output files have fixed headers -- Minor optimizations to simpleMerge --- .../variantcontext/VariantContextUtils.java | 6 +-- .../CombineVariantsIntegrationTest.java | 46 ++++++++++++------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index e571bb4c1..5421960b2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -505,7 +505,7 @@ public class VariantContextUtils { final Set alleles = new LinkedHashSet(); final Set filters = new HashSet(); - final Map attributes = new TreeMap(); + final Map attributes = new LinkedHashMap(); final Set inconsistentAttributes = new HashSet(); final Set variantSources = new HashSet(); // contains the set of sources we found in our set of VCs that are variant final Set rsIDs = new LinkedHashSet(1); // most of the time there's one id @@ -513,7 +513,7 @@ public class VariantContextUtils { GenomeLoc loc = getLocation(genomeLocParser,first); int depth = 0; int maxAC = -1; - final Map attributesWithMaxAC = new TreeMap(); + final Map attributesWithMaxAC = new LinkedHashMap(); double log10PError = 1; VariantContext vcWithMaxAC = null; GenotypesContext genotypes = GenotypesContext.create(); @@ -657,7 +657,7 @@ public class VariantContextUtils { builder.genotypes(genotypes); builder.log10PError(log10PError); builder.filters(filters.isEmpty() ? filters : new TreeSet(filters)); - builder.attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); + builder.attributes(new TreeMap(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes)); // Trim the padded bases of all alleles if necessary final VariantContext merged = builder.make(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 2f28a6dbc..9ea751b72 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -45,12 +45,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest { // TODO TODO TODO TODO TODO TODO TODO TODO // private static String baseTestString(String args) { - return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args; + return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + b36KGReference + args; + //return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args; } - private void cvExecuteTest(final String name, final WalkerTestSpec spec) { + private void cvExecuteTest(final String name, final WalkerTestSpec spec, final boolean parallel) { spec.disableShadowBCF(); - executeTestParallel(name, spec); + if ( parallel ) + executeTestParallel(name, spec); + else + executeTest(name, spec); } public void test1InOut(String file, String md5) { @@ -62,15 +66,19 @@ public class CombineVariantsIntegrationTest extends WalkerTest { baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args), 1, Arrays.asList(md5)); - cvExecuteTest("testInOut1--" + file, spec); + cvExecuteTest("testInOut1--" + file, spec, true); } public void combine2(String file1, String file2, String args, String md5) { + combine2(file1, file2, args, md5, true); + } + + public void combine2(String file1, String file2, String args, String md5, final boolean parallel) { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args), 1, Arrays.asList(md5)); - cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); + cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, parallel); } public void combineSites(String args, String md5) { @@ -82,7 +90,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { + " -V:hm3 " + validationDataLocation + file2 + args, 1, Arrays.asList(md5)); - cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); + cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true); } public void combinePLs(String file1, String file2, String md5) { @@ -90,26 +98,29 @@ public class CombineVariantsIntegrationTest extends WalkerTest { "-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + privateTestDir + file1 + " -V:v2 " + privateTestDir + file2, 1, Arrays.asList(md5)); - cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); + cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, true); } - @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b"); } - @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo"); } - @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); } + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b", " -U LENIENT_VCF_PROCESSING"); } + @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo -U LENIENT_VCF_PROCESSING"); } + @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null -U LENIENT_VCF_PROCESSING"); } @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "381875b3280ba56eef0152e56f64f68d"); } @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5d04f22ef88ed9226cbd7b4483c5cb23"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e54d0dcf14f90d5c8e58b45191dd0219"); } - @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); } + @Test public void uniqueSNPs() { + // parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s + combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "acc70f33be741b564f7be9aa3f819dd4", true); + } @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); } @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "5f61145949180bf2a0cd342d8e064860"); } @@ -122,11 +133,12 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -V:denovoInfo "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + " -setKey centerSet" + " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + + " -U LENIENT_VCF_PROCESSING" + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019")); - cvExecuteTest("threeWayWithRefs", spec); + cvExecuteTest("threeWayWithRefs", spec, true); } // complex examples with filtering, indels, and multiple alleles @@ -139,7 +151,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { + " -V:two " + privateTestDir + file2 + args, 1, Arrays.asList(md5)); - cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); + cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true); } @Test public void complexTestFull() { combineComplexSites("", "9d989053826ffe5bef7c4e05ac51bcca"); } @@ -153,6 +165,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest { "-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132, 1, Arrays.asList("aa926eae333208dc1f41fe69dc95d7a6")); - cvExecuteTest("combineDBSNPDuplicateSites:", spec); + cvExecuteTest("combineDBSNPDuplicateSites:", spec, true); } } \ No newline at end of file From 3556c366689b28391df87caf4908fe85a1700540 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 15 Aug 2012 21:12:55 -0400 Subject: [PATCH 133/176] Disable general ploidy integration tests because they are running forever --- ...fiedGenotyperGeneralPloidyIntegrationTest.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index f62b2250e..80c971601 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -23,6 +23,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { final String NA12891_CALLS = comparisonDataLocation + "Unvalidated/mtDNA/NA12891.snp.vcf"; final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf"; final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf"; + private void PC_MT_Test(String bam, String args, String name, String md5) { final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ", REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s"; @@ -44,35 +45,33 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { executeTest("testPoolCaller:"+name+" args=" + args, spec); } - @Test + @Test(enabled = false) public void testBOTH_GGA_Pools() { PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93"); } - @Test + @Test(enabled = false) public void testINDEL_GGA_Pools() { PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b"); } - @Test + @Test(enabled = false) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d"); } - @Test + @Test(enabled = false) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4"); } - @Test + @Test(enabled = false) public void testMT_SNP_DISCOVERY_sp4() { PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459"); } - @Test + @Test(enabled = false) public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7"); } - } From 9d09230c26ff832f48d0c7ee289971a2ca5d2168 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 15 Aug 2012 21:55:08 -0400 Subject: [PATCH 134/176] Better docs for verbose output of Pileup --- .../org/broadinstitute/sting/gatk/walkers/Pileup.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java index 0eb3a628d..2a6ecdb8c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java @@ -64,9 +64,17 @@ import java.util.List; */ @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class Pileup extends LocusWalker implements TreeReducible { + + private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names + @Output PrintStream out; + /** + * In addition to the standard pileup output, adds 'verbose' output too. The verbose output contains the number of spanning deletions, + * and for each read in the pileup it has the read name, offset in the base string, read length, and read mapping quality. These per + * read items are delimited with an '@' character. + */ @Argument(fullName="showVerbose",shortName="verbose",doc="Add an extra verbose section to the pileup output") public boolean SHOW_VERBOSE = false; @@ -116,8 +124,6 @@ public class Pileup extends LocusWalker implements TreeReducib return rodString; } - private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names - private static String createVerboseOutput(final ReadBackedPileup pileup) { final StringBuilder sb = new StringBuilder(); boolean isFirst = true; From f368e568db8ec9818b24570bca62114a6be81cc3 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 15 Aug 2012 22:52:56 -0400 Subject: [PATCH 135/176] Implementing support in BaseRecalibrator for SOLiD no call strategies other than throwing an exception. For some reason we never transfered these capabilities into BQSRv2 earlier. --- .../gatk/walkers/bqsr/BaseRecalibrator.java | 2 +- .../sting/utils/recalibration/RecalUtils.java | 85 ++++++++++++------- 2 files changed, 56 insertions(+), 31 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index f69a02002..3f35cf8e8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -227,7 +227,7 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed if (readNotSeen(read)) { read.setTemporaryAttribute(SEEN_ATTRIBUTE, true); RecalUtils.parsePlatformForRead(read, RAC); - if (RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { + if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true); continue; } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index a605c4649..2d05877af 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -524,46 +524,71 @@ public class RecalUtils { /** * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are - * inconsistent with the color space. If there is no call in the color space, this method returns true meaning + * inconsistent with the color space. If there is a no call in the color space, this method returns false meaning * this read should be skipped * * @param strategy the strategy used for SOLID no calls * @param read The SAMRecord to parse - * @return whether or not this read should be skipped + * @return true if this read is consistent or false if this read should be skipped */ public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) { - if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base - if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read - final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG); - if (attr != null) { - byte[] colorSpace; - if (attr instanceof String) - colorSpace = ((String) attr).getBytes(); - else - throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); - - byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read - if (read.getReadNegativeStrandFlag()) - readBases = BaseUtils.simpleReverseComplement(read.getReadBases()); - - final byte[] inconsistency = new byte[readBases.length]; - int i; - byte prevBase = colorSpace[0]; // The sentinel - for (i = 0; i < readBases.length; i++) { - final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]); - inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1); - prevBase = readBases[i]; - } - read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); - } - else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it - throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); + if (!ReadUtils.isSOLiDRead(read)) // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base + return true; + if (read.getAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read + final Object attr = read.getAttribute(RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG); + if (attr != null) { + byte[] colorSpace; + if (attr instanceof String) + colorSpace = ((String) attr).getBytes(); else - return true; // otherwise, just skip the read + throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalUtils.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); + + final boolean badColor = hasNoCallInColorSpace(colorSpace); + if (badColor) { + if (strategy == SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED) { + return false; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them + } + else if (strategy == SOLID_NOCALL_STRATEGY.PURGE_READ) { + read.setReadFailsVendorQualityCheckFlag(true); + return false; + } + } + + byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read + if (read.getReadNegativeStrandFlag()) + readBases = BaseUtils.simpleReverseComplement(read.getReadBases()); + + final byte[] inconsistency = new byte[readBases.length]; + int i; + byte prevBase = colorSpace[0]; // The sentinel + for (i = 0; i < readBases.length; i++) { + final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]); + inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1); + prevBase = readBases[i]; + } + read.setAttribute(RecalUtils.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); + } + else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it + throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); + + else + return false; // otherwise, just skip the read + } + + return true; + } + + private static boolean hasNoCallInColorSpace(final byte[] colorSpace) { + final int length = colorSpace.length; + for (int i = 1; i < length; i++) { // skip the sentinal + final byte color = colorSpace[i]; + if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') { + return true; // There is a bad color in this SOLiD read } } - return false; + + return false; // There aren't any color no calls in this SOLiD read } /** From fa7605c6436dd2e196bf60690b55f0a4a454f783 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 15 Aug 2012 22:54:13 -0400 Subject: [PATCH 136/176] Convert external.build.dir and external.dist.dir back to paths The previous push fixed the external classpath issue but broke external builds in a new way by changing the above from paths to properties. This was a mistake, since external builds require absolute, not relative, paths Thanks to akiezun for the bug report and patch --- build.xml | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/build.xml b/build.xml index 2ca48c51b..f681ddafa 100644 --- a/build.xml +++ b/build.xml @@ -70,10 +70,6 @@ - - - - @@ -211,6 +207,14 @@ + + + + + + + + @@ -425,8 +429,8 @@ - - + + @@ -675,9 +679,9 @@ - - - + + + From 9035b554fb87d42e9aa96eccdc74cdcf8858fb0b Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 15 Aug 2012 23:13:24 -0400 Subject: [PATCH 137/176] Adding tests for the --solid_nocall_strategy argument --- .../gatk/walkers/bqsr/BQSRIntegrationTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 0c212763d..bd75806dd 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -62,6 +62,7 @@ public class BQSRIntegrationTest extends WalkerTest { {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "43fcba51264cc98bd8466d21e1b96766")}, {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "48aaf9ac54b97eac3663882a59354ab2")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "dac04b9e1e1c52af8d3a50c2e550fda9")}, + {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "90d70542076715a8605a8d4002614b34")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "600652ee49b9ce1ca2d8ee2d8b7c8211")}, {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "26a04f5a28c40750c603cbe8a926d7bd")}, }; @@ -94,6 +95,20 @@ public class BQSRIntegrationTest extends WalkerTest { executeTest("testBQSRFailWithoutDBSNP", spec); } + @Test + public void testBQSRFailWithSolidNoCall() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + " -T BaseRecalibrator" + + " -R " + b36KGReference + + " -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" + + " -L 1:50,000-80,000" + + " --no_plots" + + " -o %s", + 1, // just one output file + UserException.class); + executeTest("testBQSRFailWithSolidNoCall", spec); + } + private static class PRTest { final String args; final String md5; From a9a1c499fd7ec9295d488d5c272337dad852703a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 09:28:03 -0400 Subject: [PATCH 138/176] Update md5 in VariantRecalibrationWalkers test for BCF2 -- only encoding differences --- .../VariantRecalibrationWalkersIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index d1ecbb0bf..b780bcd00 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -76,7 +76,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", "a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches - "1cdf8c9ee77d91d1ba7f002573108bad", // recal file + "74c10fc15f9739a938b7138909fbde04", // recal file "62fda105e14b619a1c263855cf56af1d"); // cut VCF @DataProvider(name = "VRBCFTest") From 9dc694b2e9fe4a476636d751297d2bed26e783df Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 10:01:10 -0400 Subject: [PATCH 139/176] Meaningful error message and keeping tmp file when mergeInfo fails -- BCF2 is failing for some reason when merging tmp. files with parallel combine variants. ThreadLocalOutputTracker no longer sets deleteOnExit on the tmp file, as this prevents debugging. And it's unnecessary because each mergeInto was deleting files as appropriate -- MergeInfo in VariantContextWriterStorage only deletes the intermediate output if an error occurs --- .../broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java | 2 +- .../sting/gatk/io/storage/VariantContextWriterStorage.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java index 999deddd1..636787c69 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java @@ -119,7 +119,7 @@ public class ThreadLocalOutputTracker extends OutputTracker { try { tempFile = File.createTempFile( stub.getClass().getName(), null ); - tempFile.deleteOnExit(); + //tempFile.deleteOnExit(); } catch( IOException ex ) { throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index 161179f84..72f8581dd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -194,6 +194,9 @@ public class VariantContextWriterStorage implements Storage Date: Thu, 16 Aug 2012 10:53:22 -0400 Subject: [PATCH 140/176] Cleanup BCF2Codec -- Remove FORBID_SYMBOLIC global that is no longer necessary -- all error handling goes via error() function --- .../sting/utils/codecs/bcf2/BCF2Codec.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index ac6348f80..fc0b3c4a9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -51,7 +51,6 @@ import java.util.Map; */ public final class BCF2Codec implements FeatureCodec { final protected static Logger logger = Logger.getLogger(BCF2Codec.class); - private final static boolean FORBID_SYMBOLICS = false; private final static int ALLOWED_MAJOR_VERSION = 2; private final static int MIN_MINOR_VERSION = 1; @@ -178,7 +177,7 @@ public final class BCF2Codec implements FeatureCodec { contigNames.add(contig.getID()); } } else { - throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header"); + error("Didn't find any contig lines in BCF2 file header"); } // create the string dictionary @@ -271,7 +270,7 @@ public final class BCF2Codec implements FeatureCodec { final int nSamples = nFormatSamples & 0x00FFFFF; if ( header.getNGenotypeSamples() != nSamples ) - throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " + + error("GATK currently doesn't support reading BCF2 files with " + "different numbers of samples per record. Saw " + header.getNGenotypeSamples() + " samples in header but have a record with " + nSamples + " samples"); @@ -343,9 +342,6 @@ public final class BCF2Codec implements FeatureCodec { if ( isRef ) ref = alleleBases; alleles.add(allele); - - if ( FORBID_SYMBOLICS && allele.isSymbolic() ) - throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles"); } assert ref != null; @@ -496,7 +492,7 @@ public final class BCF2Codec implements FeatureCodec { return gtFieldDecoders.getDecoder(field); } - private final void error(final String message) throws RuntimeException { + private void error(final String message) throws RuntimeException { throw new UserException.MalformedBCF2(String.format("%s, at record %d with position %d:", message, recordNo, pos)); } } From 7a247df922d7cdea7f3348f3b1c3737e92d2df6b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 10:54:52 -0400 Subject: [PATCH 141/176] Added -bcf argument to VCFWriter output to force BCF regardless of file extension -- Now possible to do -o /dev/stdout -bcf -l DEBUG > tmp.bcf and create a valid BCF2 file -- Cleanup code to make sure extensions easier by moving to a setX model in VariantContextWriterStub --- .../VCFWriterArgumentTypeDescriptor.java | 53 ++++++++++++++----- .../io/stubs/VariantContextWriterStub.java | 45 +++++++++------- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 09766f127..5e1132d45 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -47,6 +47,7 @@ import java.util.List; public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header"; public static final String SITES_ONLY_ARG_NAME = "sites_only"; + public static final String FORCE_BCF = "bcf"; public static final HashSet SUPPORTED_ZIPPED_SUFFIXES = new HashSet(); // @@ -96,7 +97,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public List createArgumentDefinitions( ArgumentSource source ) { - return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition()); + return Arrays.asList( + createDefaultArgumentDefinition(source), + createNoCommandLineHeaderArgumentDefinition(), + createSitesOnlyArgumentDefinition(), + createBCFArgumentDefinition() ); } /** @@ -117,7 +122,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { if(!source.isRequired()) throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); - VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false); + VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); engine.addOutput(stub); return stub; } @@ -141,15 +146,15 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { if(writerFile == null && !source.isRequired()) throw new MissingArgumentValueException(defaultArgumentDefinition); - // Should we compress the output stream? - boolean compress = isCompressed(writerFileName); - - boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches); - boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches); - // Create a stub for the given object. - VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes) - : new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes); + final VariantContextWriterStub stub = (writerFile != null) + ? new VariantContextWriterStub(engine, writerFile, argumentSources) + : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); + + stub.setCompressed(isCompressed(writerFileName)); + stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches)); + stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches)); + stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches)); // WARNING: Side effects required by engine! parsingEngine.addTags(stub,getArgumentTags(matches)); @@ -159,8 +164,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { } /** - * Creates the optional compression level argument for the BAM file. - * @return Argument definition for the BAM file itself. Will not be null. + * Creates the optional no_header argument for the VCF file. + * @return Argument definition for the VCF file itself. Will not be null. */ private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() { return new ArgumentDefinition( ArgumentIOType.ARGUMENT, @@ -179,8 +184,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { } /** - * Creates the optional compression level argument for the BAM file. - * @return Argument definition for the BAM file itself. Will not be null. + * Creates the optional sites_only argument definition + * @return Argument definition for the VCF file itself. Will not be null. */ private ArgumentDefinition createSitesOnlyArgumentDefinition() { return new ArgumentDefinition( ArgumentIOType.ARGUMENT, @@ -198,6 +203,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { null ); } + /** + * Creates the optional bcf argument definition + * @return Argument definition for the VCF file itself. Will not be null. + */ + private ArgumentDefinition createBCFArgumentDefinition() { + return new ArgumentDefinition( ArgumentIOType.ARGUMENT, + boolean.class, + FORCE_BCF, + FORCE_BCF, + "force BCF output, regardless of the file's extension", + false, + true, + false, + true, + null, + null, + null, + null ); + } + /** * Returns true if the file will be compressed. * @param writerFileName Name of the file diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java index bea7172ea..260a7efda 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java @@ -79,7 +79,7 @@ public class VariantContextWriterStub implements Stub, Var /** * Should we emit a compressed output stream? */ - private final boolean isCompressed; + private boolean isCompressed = false; /** * A hack: push the argument sources into the VCF header so that the VCF header @@ -90,12 +90,17 @@ public class VariantContextWriterStub implements Stub, Var /** * Should the header be written out? A hidden argument. */ - private final boolean skipWritingCommandLineHeader; + private boolean skipWritingCommandLineHeader = false; /** * Should we not write genotypes even when provided? */ - private final boolean doNotWriteGenotypes; + private boolean doNotWriteGenotypes = false; + + /** + * Should we force BCF writing regardless of the file extension? + */ + private boolean forceBCF = false; /** * Connects this stub with an external stream capable of serving the @@ -108,19 +113,13 @@ public class VariantContextWriterStub implements Stub, Var * * @param engine engine. * @param genotypeFile file to (ultimately) create. - * @param isCompressed should we compress the output stream? * @param argumentSources sources. - * @param skipWritingCommandLineHeader skip writing header. - * @param doNotWriteGenotypes do not write genotypes. */ - public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { + public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection argumentSources) { this.engine = engine; this.genotypeFile = genotypeFile; this.genotypeStream = null; - this.isCompressed = isCompressed; this.argumentSources = argumentSources; - this.skipWritingCommandLineHeader = skipWritingCommandLineHeader; - this.doNotWriteGenotypes = doNotWriteGenotypes; } /** @@ -128,19 +127,13 @@ public class VariantContextWriterStub implements Stub, Var * * @param engine engine. * @param genotypeStream stream to (ultimately) write. - * @param isCompressed should we compress the output stream? * @param argumentSources sources. - * @param skipWritingCommandLineHeader skip writing header. - * @param doNotWriteGenotypes do not write genotypes. */ - public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { + public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection argumentSources) { this.engine = engine; this.genotypeFile = null; this.genotypeStream = new PrintStream(genotypeStream); - this.isCompressed = isCompressed; this.argumentSources = argumentSources; - this.skipWritingCommandLineHeader = skipWritingCommandLineHeader; - this.doNotWriteGenotypes = doNotWriteGenotypes; } /** @@ -167,6 +160,22 @@ public class VariantContextWriterStub implements Stub, Var return isCompressed; } + public void setCompressed(boolean compressed) { + isCompressed = compressed; + } + + public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) { + this.skipWritingCommandLineHeader = skipWritingCommandLineHeader; + } + + public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) { + this.doNotWriteGenotypes = doNotWriteGenotypes; + } + + public void setForceBCF(boolean forceBCF) { + this.forceBCF = forceBCF; + } + /** * Gets the master sequence dictionary from the engine associated with this stub * @link GenomeAnalysisEngine.getMasterSequenceDictionary @@ -187,7 +196,7 @@ public class VariantContextWriterStub implements Stub, Var if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER); if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY); - if ( getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile()) ) + if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) ) options.add(Options.FORCE_BCF); return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options); From 52bfe8db8a7e472f2282be89372b0c10021ab10d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 10:56:07 -0400 Subject: [PATCH 142/176] Make sure the storage writer is closed before running mergeInfo in multi-threaded output management -- It's not clear this is cause of GSA-484 but it will help confirm that it's not the cause --- .../sting/gatk/io/storage/VariantContextWriterStorage.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index 72f8581dd..0f5290db7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -61,6 +61,7 @@ public class VariantContextWriterStorage implements Storage Date: Thu, 16 Aug 2012 12:39:54 -0400 Subject: [PATCH 143/176] GSA-485: Remove repairVCFHeader from GATK codebase -- Removed half-a*ssed attempt to automatically repair VCF files with bad headers, which allowed users to provide a replacement header overwriting the file's actually header on the fly. Not a good idea, really. Eric has promised to create a utility that walks through a VCF file and creates a meaningful header field based on the file's contents (if this ever becomes a priority) --- .../sting/gatk/GenomeAnalysisEngine.java | 15 ++--------- .../arguments/GATKArgumentCollection.java | 9 ------- .../gatk/refdata/tracks/FeatureManager.java | 8 ++---- .../gatk/refdata/tracks/RMDTrackBuilder.java | 16 +---------- .../sting/utils/codecs/vcf/VCFCodec.java | 27 ------------------- .../variantcontext/writer/VCFWriter.java | 2 +- .../utils/codecs/vcf/VCFIntegrationTest.java | 7 +---- 7 files changed, 7 insertions(+), 77 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 56fcf0652..55107833d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -849,20 +849,9 @@ public class GenomeAnalysisEngine { SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { - VCFHeader header = null; - if ( getArguments().repairVCFHeader != null ) { - try { - final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader)); - header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue(); - pbs.close(); - } catch ( IOException e ) { - throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e); - } - } + final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType); - RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType); - - List dataSources = new ArrayList(); + final List dataSources = new ArrayList(); for (RMDTriplet fileDescriptor : referenceMetaDataFiles) dataSources.add(new ReferenceOrderedDataSource(fileDescriptor, builder, diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 4c9235b58..06177868a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -384,14 +384,5 @@ public class GATKArgumentCollection { @Hidden public boolean USE_SLOW_GENOTYPES = false; // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed - - /** - * The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file - * and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other - * VCF file that GATK reads in. This allows us to have in effect a master set of header records and use these - * to fill in any missing ones in input VCF files. - */ - @Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false) - public File repairVCFHeader = null; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index b5d5deedb..a2fe94641 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -85,18 +85,16 @@ public class FeatureManager { private final PluginManager pluginManager; private final Collection featureDescriptors = new TreeSet(); - private final VCFHeader headerForRepairs; private final boolean lenientVCFProcessing; /** * Construct a FeatureManager without a master VCF header */ public FeatureManager() { - this(null, false); + this(false); } - public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) { - this.headerForRepairs = headerForRepairs; + public FeatureManager(final boolean lenientVCFProcessing) { this.lenientVCFProcessing = lenientVCFProcessing; pluginManager = new PluginManager(FeatureCodec.class, "Codecs", "Codec"); @@ -255,8 +253,6 @@ public class FeatureManager { ((NameAwareCodec)codex).setName(name); if ( codex instanceof ReferenceDependentFeatureCodec ) ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); - if ( codex instanceof VCFCodec ) - ((VCFCodec)codex).setHeaderForRepairs(headerForRepairs); if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing ) ((AbstractVCFCodec)codex).disableOnTheFlyModifications(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index e183fe169..81fe73075 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager { * please talk through your approach with the SE team. * @param dict Sequence dictionary to use. * @param genomeLocParser Location parser to use. - * @param headerForRepairs a VCF header that should be used to repair VCF headers. Can be null * @param validationExclusionType Types of validations to exclude, for sequence dictionary verification. */ public RMDTrackBuilder(final SAMSequenceDictionary dict, final GenomeLocParser genomeLocParser, - final VCFHeader headerForRepairs, ValidationExclusion.TYPE validationExclusionType) { this.dict = dict; this.validationExclusionType = validationExclusionType; this.genomeLocParser = genomeLocParser; - this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType)); + this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType)); } /** @@ -111,18 +109,6 @@ public class RMDTrackBuilder { // extends PluginManager { return featureManager; } - /** - * Same as full constructor but makes one without a header for repairs - * @param dict - * @param genomeLocParser - * @param validationExclusionType - */ - public RMDTrackBuilder(final SAMSequenceDictionary dict, - final GenomeLocParser genomeLocParser, - ValidationExclusion.TYPE validationExclusionType) { - this(dict, genomeLocParser, null, validationExclusionType); - } - /** * create a RMDTrack of the specified type * diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index da5b18831..4df1efee7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -49,13 +49,6 @@ public class VCFCodec extends AbstractVCFCodec { // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4"; - /** - * A VCF header the contains master info/filter/format records that we use to 'fill in' - * any missing records from our input VCF header. This allows us to repair headers on - * the fly - */ - private VCFHeader headerForRepairs = null; - /** * @param reader the line reader to take header lines from * @return the number of header lines @@ -88,8 +81,6 @@ public class VCFCodec extends AbstractVCFCodec { } headerStrings.add(line); super.parseHeaderFromLines(headerStrings, version); - if ( headerForRepairs != null ) - this.header = repairHeader(this.header, headerForRepairs); return this.header; } else { @@ -103,24 +94,6 @@ public class VCFCodec extends AbstractVCFCodec { throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file"); } - private final VCFHeader repairHeader(final VCFHeader readHeader, final VCFHeader masterHeader) { - final Set lines = VCFUtils.smartMergeHeaders(Arrays.asList(readHeader, masterHeader), log); - return new VCFHeader(lines, readHeader.getGenotypeSamples()); - } - - /** - * Tells this VCFCodec to repair the incoming header files with the information in masterHeader - * - * @param headerForRepairs - */ - public void setHeaderForRepairs(final VCFHeader headerForRepairs) { - if ( headerForRepairs != null ) - log.info("Using master VCF header to repair missing files from incoming VCFs"); - this.headerForRepairs = headerForRepairs; - } - - - /** * parse the filter string, first checking to see if we already have parsed it in a previous attempt * diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index ea968e153..db74f2263 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -563,6 +563,6 @@ class VCFWriter extends IndexingVariantContextWriter { + " at " + vc.getChr() + ":" + vc.getStart() + " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have" + " complete VCF headers by default. This error can be disabled with the engine argument" - + " -U LENIENT_VCF_PROCESSING or repair the VCF file header using repairVCFHeader"); + + " -U LENIENT_VCF_PROCESSING"); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 71fc1d464..b2a4ac2da 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -92,7 +92,7 @@ public class VCFIntegrationTest extends WalkerTest { // // - // Tests to ensure that -U LENIENT_VCF_PROCESS and header repairs are working + // Tests to ensure that -U LENIENT_VCF_PROCESS // // @@ -106,11 +106,6 @@ public class VCFIntegrationTest extends WalkerTest { runVCFWithoutHeaders("-U LENIENT_VCF_PROCESSING", "6de8cb7457154dd355aa55befb943f88", null, true); } - @Test - public void testPassingOnVCFWithoutHeadersRepairingHeaders() { - runVCFWithoutHeaders("-repairVCFHeader " + privateTestDir + "vcfexample2.justHeader.vcf", "ff61e9cad6653c7f93d82d391f7ecdcb", null, false); - } - private void runVCFWithoutHeaders(final String moreArgs, final String expectedMD5, final Class expectedException, final boolean disableBCF) { final String testVCF = privateTestDir + "vcfexample2.noHeader.vcf"; final String baseCommand = "-R " + b37KGReference From 132cdfd9c16efd506b448f1ceb315b0240393f4b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 13:00:35 -0400 Subject: [PATCH 144/176] GSA-488: MLEAC > AN error when running variant eval fixed --- .../varianteval/stratifications/AlleleCount.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 50c5526e4..00a593768 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -19,6 +19,8 @@ import java.util.*; * it computes the AC from the genotypes themselves. If no AC can be computed, 0 is used. */ public class AlleleCount extends VariantStratifier { + int nchrom; + @Override public void initialize() { // we can only work with a single eval VCF, and it must have genotypes @@ -26,7 +28,8 @@ public class AlleleCount extends VariantStratifier { throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf"); // There are 2 x n sample chromosomes for diploids - int nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2; + // TODO -- generalize to handle multiple ploidy + nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2; if ( nchrom < 2 ) throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample"); @@ -52,8 +55,10 @@ public class AlleleCount extends VariantStratifier { } // make sure that the AC isn't invalid - if ( AC > eval.getCalledChrCount() ) - throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d is larger than the possible called chromosome count (%d)", AC, eval.getChr(), eval.getStart(), eval.getCalledChrCount())); + if ( AC > nchrom ) + throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d " + + "is larger than the number of chromosomes over all samples (%d)", AC, + eval.getChr(), eval.getStart(), nchrom)); return Collections.singletonList((Object) AC); } else { From 2df04dc48a5ce9f4ecaa08d00fc63b03ce08f23b Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 13:05:17 -0400 Subject: [PATCH 145/176] Fix for performance problem in GGA mode related to previous --regenotype commit. Instead of trying to hack around the determination of the calculation model when it's not needed, just simply overload the calculateGenotypes() method to add one that does simple genotyping. Re-enabling the Pool Caller integration tests. --- ...fiedGenotyperGeneralPloidyIntegrationTest.java | 12 ++++++------ .../walkers/genotyper/UnifiedGenotyperEngine.java | 15 +++++++++++---- .../gatk/walkers/variantutils/SelectVariants.java | 2 +- .../SelectVariantsIntegrationTest.java | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index 80c971601..6ae34f190 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -45,32 +45,32 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { executeTest("testPoolCaller:"+name+" args=" + args, spec); } - @Test(enabled = false) + @Test(enabled = true) public void testBOTH_GGA_Pools() { PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","0934f72865388999efec64bd9d4a9b93"); } - @Test(enabled = false) + @Test(enabled = true) public void testINDEL_GGA_Pools() { PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","126581c72d287722437274d41b6fed7b"); } - @Test(enabled = false) + @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","b543aa1c3efedb301e525c1d6c50ed8d"); } - @Test(enabled = false) + @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","55b20557a836bb92688e68f12d7f5dc4"); } - @Test(enabled = false) + @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","7eb889e8e07182f4c3d64609591f9459"); } - @Test(enabled = false) + @Test(enabled = true) public void testMT_SNP_GGA_sp10() { PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "db8114877b99b14f7180fdcd24b040a7"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index f15fa9b99..c9656dd00 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -259,6 +258,16 @@ public class UnifiedGenotyperEngine { return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model); } + /** + * Compute genotypes at a given locus. + * + * @param vc the GL-annotated variant context + * @return the VariantCallContext object + */ + public VariantCallContext calculateGenotypes(VariantContext vc) { + return calculateGenotypes(null, null, null, null, vc, GenotypeLikelihoodsCalculationModel.Model.valueOf("SNP"), false); + } + // --------------------------------------------------------------------------------------------------------- // @@ -647,10 +656,8 @@ public class UnifiedGenotyperEngine { // if we're genotyping given alleles and we have a requested SNP at this position, do SNP if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); - if ( vcInput == null ) { - models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); + if ( vcInput == null ) return models; - } if ( vcInput.isSNP() ) { // ignore SNPs if the user chose INDEL mode only diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 0810710c1..f775c8dd6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -568,7 +568,7 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS); if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) { - final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(null, ref, context, sub)).filters(sub.getFiltersMaybeNull()); + final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(sub)).filters(sub.getFiltersMaybeNull()); addAnnotations(builder, sub); sub = builder.make(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index e172200f7..bde597fbe 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -148,7 +148,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("52cb2f150559ca1457e9df7ec153dbb452cb2f150559ca1457e9df7ec153dbb4") + Arrays.asList("52cb2f150559ca1457e9df7ec153dbb4") ); executeTest("testRegenotype--" + testFile, spec); From dac3958461173a38a1bfad03343cd1782b3a3628 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 13:32:44 -0400 Subject: [PATCH 146/176] Killing off some FindBugs 'Usability' issues --- .../genotyper/GeneralPloidyIndelGenotypeLikelihoods.java | 2 -- .../sting/commandline/ArgumentDefinitionGroup.java | 6 ++---- .../sting/gatk/walkers/beagle/BeagleOutputToVCF.java | 4 ---- .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 4 ++-- .../sting/gatk/walkers/phasing/PhaseByTransmission.java | 2 +- .../gatk/walkers/variantutils/VariantsToBinaryPed.java | 4 ---- .../src/org/broadinstitute/sting/utils/MannWhitneyU.java | 4 ++-- .../java/src/org/broadinstitute/sting/utils/MathUtils.java | 3 +-- public/java/src/org/broadinstitute/sting/utils/Utils.java | 1 - .../sting/utils/codecs/vcf/VCFHeaderLine.java | 2 +- .../sting/utils/help/GenericDocumentationHandler.java | 2 +- .../utils/recalibration/covariates/CycleCovariate.java | 2 +- 12 files changed, 11 insertions(+), 25 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java index 4f42f820e..34267b9a8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java @@ -142,8 +142,6 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype List numSeenBases = new ArrayList(this.alleles.size()); if (!hasReferenceSampleData) { - final int numHaplotypes = haplotypeMap.size(); - final int readCounts[] = new int[pileup.getNumberOfElements()]; readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts); n = readHaplotypeLikelihoods.length; diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java index b47677b08..474225e2a 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitionGroup.java @@ -55,10 +55,8 @@ public class ArgumentDefinitionGroup implements Iterable { * Does the name of this argument group match the name of another? */ public boolean groupNameMatches( ArgumentDefinitionGroup other ) { - if( this.groupName == null && other.groupName == null ) - return true; - if( this.groupName == null && other.groupName != null ) - return false; + if( this.groupName == null ) + return other.groupName == null; return this.groupName.equals(other.groupName); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java index 9eb0e4dda..83b10dd91 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java @@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; @@ -142,9 +141,6 @@ public class BeagleOutputToVCF extends RodWalker { hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_G", "This 'G' site was set to monomorphic by Beagle")); hInfo.add(new VCFFilterHeaderLine("BGL_RM_WAS_T", "This 'T' site was set to monomorphic by Beagle")); - // Open output file specified by output VCF ROD - final List dataSources = this.getToolkit().getRodDataSources(); - if ( comp.isBound() ) { hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index c9656dd00..05e12b43d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -183,7 +183,7 @@ public class UnifiedGenotyperEngine { for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) { final Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); if ( stratifiedContexts == null ) { - results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null); + results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null); } else { final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); @@ -202,7 +202,7 @@ public class UnifiedGenotyperEngine { final List withAllSamples = new ArrayList(calls.size()); for ( final VariantCallContext call : calls ) { if ( call == null ) - withAllSamples.add(call); + withAllSamples.add(null); else { final VariantContext withoutMissing = VariantContextUtils.addMissingSamples(call, allSamples); withAllSamples.add(new VariantCallContext(withoutMissing, call.confidentlyCalled, call.shouldEmit)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index bbd4bf92f..0dcafb30a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -645,7 +645,7 @@ public class PhaseByTransmission extends RodWalker, HashMa bestChildGenotype.clear(); bestChildGenotype.add(childGenotype.getKey()); } - else if(configurationLikelihood == bestConfigurationLikelihood) { + else if(MathUtils.compareDoubles(configurationLikelihood, bestConfigurationLikelihood) == 0) { bestFirstParentGenotype.add(firstParentGenotype.getKey()); bestSecondParentGenotype.add(secondParentGenotype.getKey()); bestChildGenotype.add(childGenotype.getKey()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java index 7e82fc454..14c811b03 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java @@ -8,8 +8,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.R.RScriptExecutorException; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -18,7 +16,6 @@ import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.*; @@ -95,7 +92,6 @@ public class VariantsToBinaryPed extends RodWalker { // write to the fam file, the first six columns of the standard ped file // first, load data from the input meta data file Map> metaValues = new HashMap>(); - Set samplesToUse = new HashSet(); logger.debug("Reading in metadata..."); try { if ( metaDataFile.getAbsolutePath().endsWith(".fam") ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java index ecb381e3f..d1bc75583 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java +++ b/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java @@ -199,9 +199,9 @@ public class MannWhitneyU { else if ( z > n ) { return 0.0; } else { if ( z > ((double) n) /2 ) { - return 1.0-1/((double)Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0); + return 1.0-1/(Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0); } else { - return 1/((double)Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0); + return 1/(Arithmetic.factorial(n))*uniformSumHelper(z, (int) Math.floor(z), n, 0); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 96704f0b8..7d1561fc5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -767,7 +767,7 @@ public class MathUtils { for (byte v : vals) { sum += v; } - return (byte) Math.floor(sum / vals.length); + return (byte) (sum / vals.length); } public static double averageDouble(List vals) { @@ -1044,7 +1044,6 @@ public class MathUtils { // the list is assumed *not* to be sorted final Comparable x = list.get(orderStat); - ListIterator iterator = list.listIterator(); ArrayList lessThanX = new ArrayList(); ArrayList equalToX = new ArrayList(); ArrayList greaterThanX = new ArrayList(); diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 476098ae6..a5b5eca6a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -563,7 +563,6 @@ public class Utils { List t = new ArrayList(c.keySet()); Collections.sort(t); - List l = new ArrayList(); List pairs = new ArrayList(); for ( T k : t ) { pairs.add(k + "=" + c.get(k)); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java index 162c34d80..83e55cb12 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java @@ -53,7 +53,7 @@ public class VCFHeaderLine implements Comparable { */ public VCFHeaderLine(String key, String value) { if ( key == null ) - throw new IllegalArgumentException("VCFHeaderLine: key cannot be null: key = " + key); + throw new IllegalArgumentException("VCFHeaderLine: key cannot be null"); mKey = key; mValue = value; } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 69d2e7c9e..dc0668cea 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -587,7 +587,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { private List> docForEnumArgument(Class enumClass) { ClassDoc doc = this.getDoclet().getClassDocForClass(enumClass); if (doc == null) // || ! doc.isEnum() ) - throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got instead: " + doc); + throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got null instead"); List> bindings = new ArrayList>(); for (final FieldDoc field : doc.fields(false)) { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java index 4f15419c7..cdf12d284 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java @@ -51,7 +51,7 @@ public class CycleCovariate implements StandardCovariate { private static final int MAXIMUM_CYCLE_VALUE = 1000; private static final int CUSHION_FOR_INDELS = 4; - private static String default_platform = null; + private String default_platform = null; private static final EnumSet DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS); private static final EnumSet FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT); From ded0e11b457f5e2aed4df28851c76b3086f28a0c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 14:00:48 -0400 Subject: [PATCH 147/176] Killing off some FindBugs 'Realiability' issues --- .../sting/alignment/reference/bwt/Bases.java | 8 +++---- .../commandline/ArgumentTypeDescriptor.java | 2 +- .../sting/commandline/ParsingMethod.java | 4 ++-- .../ReadBasedReferenceOrderedView.java | 2 +- .../reference/ReferenceDataSource.java | 3 +-- .../sting/gatk/executive/MicroScheduler.java | 2 +- .../sting/gatk/executive/TreeReducer.java | 4 ++-- .../sting/gatk/phonehome/GATKRunReport.java | 2 +- .../gatk/traversals/TraversalEngine.java | 4 ++-- .../sting/gatk/traversals/TraverseLoci.java | 2 +- .../gatk/traversals/TraverseReadPairs.java | 2 +- .../sting/gatk/traversals/TraverseReads.java | 2 +- .../walkers/annotator/ChromosomeCounts.java | 4 ++-- .../annotator/TandemRepeatAnnotator.java | 4 ++-- .../targets/FindCoveredIntervals.java | 6 ++--- .../gatk/walkers/diffengine/DiffNode.java | 2 +- .../evaluators/ThetaVariantEvaluator.java | 6 ++--- .../walkers/variantutils/SelectVariants.java | 24 ------------------- .../walkers/variantutils/VariantsToTable.java | 2 +- .../broadinstitute/sting/utils/BaseUtils.java | 8 +++---- .../broadinstitute/sting/utils/baq/BAQ.java | 2 +- .../sting/utils/codecs/vcf/VCFHeaderLine.java | 4 ++-- .../sting/utils/pileup/PileupElement.java | 3 ++- .../sting/utils/recalibration/RecalDatum.java | 4 ++-- .../utils/recalibration/RecalDatumNode.java | 2 +- .../utils/sam/GATKSAMReadGroupRecord.java | 2 +- .../sting/utils/sam/ReadUtils.java | 4 ++-- .../utils/variantcontext/GenotypeBuilder.java | 2 +- .../variantcontext/GenotypeLikelihoods.java | 2 +- .../variantcontext/VariantContextUtils.java | 2 +- 30 files changed, 48 insertions(+), 72 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java index bc0a5b63d..7cd85cfd8 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java +++ b/public/java/src/org/broadinstitute/sting/alignment/reference/bwt/Bases.java @@ -12,10 +12,10 @@ import java.util.*; */ public class Bases implements Iterable { - public static byte A = 'A'; - public static byte C = 'C'; - public static byte G = 'G'; - public static byte T = 'T'; + public static final byte A = 'A'; + public static final byte C = 'C'; + public static final byte G = 'G'; + public static final byte T = 'T'; public static final Bases instance = new Bases(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index c201e95f0..dd4a151bf 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -53,7 +53,7 @@ public abstract class ArgumentTypeDescriptor { /** * our log, which we want to capture anything from org.broadinstitute.sting */ - protected static Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class); + protected static final Logger logger = Logger.getLogger(ArgumentTypeDescriptor.class); /** * Fetch the given descriptor from the descriptor repository. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java index 26af49e12..376b6f210 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java @@ -120,8 +120,8 @@ public abstract class ParsingMethod { */ private static final String TAG_TEXT = "[\\w\\-\\.\\=]*"; - public static ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)), + public static final ParsingMethod FullNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*--(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)), ArgumentDefinitions.FullNameDefinitionMatcher) {}; - public static ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)), + public static final ParsingMethod ShortNameParsingMethod = new ParsingMethod(Pattern.compile(String.format("\\s*-(%1$s)(?:\\:(%2$s(?:,%2$s)*))?\\s*",ARGUMENT_TEXT,TAG_TEXT)), ArgumentDefinitions.ShortNameDefinitionMatcher) {}; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index 142c8a178..01e24df67 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -118,7 +118,7 @@ class WindowedData { rec.getAlignmentStart(), stop); states = new ArrayList(); - if (provider != null && provider.getReferenceOrderedData() != null) + if (provider.getReferenceOrderedData() != null) for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) states.add(new RMDDataState(dataSource, dataSource.seek(range))); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index 4ecfe472d..b131e36c1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -45,7 +45,6 @@ import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; import java.io.File; import java.util.ArrayList; import java.util.Collections; -import java.util.LinkedList; import java.util.List; /** @@ -56,7 +55,7 @@ public class ReferenceDataSource { private IndexedFastaSequenceFile reference; /** our log, which we want to capture anything from this class */ - protected static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class); + protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class); /** * Create reference data source from fasta file diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 508099708..95e39b7c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -58,7 +58,7 @@ import java.util.Collection; /** Shards and schedules data in manageable chunks. */ public abstract class MicroScheduler implements MicroSchedulerMBean { - protected static Logger logger = Logger.getLogger(MicroScheduler.class); + protected static final Logger logger = Logger.getLogger(MicroScheduler.class); /** * Counts the number of instances of the class that are currently alive. diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java index 632638f64..390da0cce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java @@ -66,13 +66,13 @@ public class TreeReducer implements Callable { * @return Result of the reduce. */ public Object call() { - Object result = null; + Object result; final long startTime = System.currentTimeMillis(); try { if( lhs == null ) - result = lhs.get(); + result = null; // todo -- what the hell is this above line? Shouldn't it be the two below? // if( lhs == null ) // throw new IllegalStateException(String.format("Insufficient data on which to reduce; lhs = %s, rhs = %s", lhs, rhs) ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index 4cf5046a2..b60a7845a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -93,7 +93,7 @@ public class GATKRunReport { /** * our log */ - protected static Logger logger = Logger.getLogger(GATKRunReport.class); + protected static final Logger logger = Logger.getLogger(GATKRunReport.class); @Element(required = false, name = "id") diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 2593fc72e..abc71e549 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -92,7 +92,7 @@ public abstract class TraversalEngine,Provide GenomeLocSortedSet targetIntervals = null; /** our log, which we want to capture anything from this class */ - protected static Logger logger = Logger.getLogger(TraversalEngine.class); + protected static final Logger logger = Logger.getLogger(TraversalEngine.class); protected GenomeAnalysisEngine engine; @@ -354,7 +354,7 @@ public abstract class TraversalEngine,Provide synchronized(performanceLogLock) { // Ignore multiple calls to reset the same lock. - if(performanceLogFile != null && performanceLogFile.equals(fileName)) + if(performanceLogFile != null && performanceLogFile.equals(file)) return; // Close an existing log diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 5c9b83312..a5a6919a2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -19,7 +19,7 @@ public class TraverseLoci extends TraversalEngine,Locu /** * our log, which we want to capture anything from this class */ - protected static Logger logger = Logger.getLogger(TraversalEngine.class); + protected static final Logger logger = Logger.getLogger(TraversalEngine.class); @Override protected String getTraversalType() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index dd4402d82..ebaac40af 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -24,7 +24,7 @@ import java.util.List; public class TraverseReadPairs extends TraversalEngine,ReadShardDataProvider> { /** our log, which we want to capture anything from this class */ - protected static Logger logger = Logger.getLogger(TraverseReadPairs.class); + protected static final Logger logger = Logger.getLogger(TraverseReadPairs.class); @Override protected String getTraversalType() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 24b8ac986..cb094d29b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; */ public class TraverseReads extends TraversalEngine,ReadShardDataProvider> { /** our log, which we want to capture anything from this class */ - protected static Logger logger = Logger.getLogger(TraverseReads.class); + protected static final Logger logger = Logger.getLogger(TraverseReads.class); @Override protected String getTraversalType() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 54837baad..6bdd779c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -53,8 +53,8 @@ import java.util.*; */ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; - public static final VCFInfoHeaderLine[] descriptions = { + protected static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; + protected static final VCFInfoHeaderLine[] descriptions = { VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY), VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY), VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) }; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java index eced387b3..f220ecbd2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java @@ -66,8 +66,8 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa return map; } - public static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY }; - public static final VCFInfoHeaderLine[] descriptions = { + protected static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY }; + protected static final VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"), new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"), new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") }; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 373c8232e..e17c6cdb7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -78,9 +78,9 @@ public class FindCoveredIntervals extends ActiveRegionWalker { public Long reduce(final GenomeLoc value, Long reduce) { if (value != null) { out.println(value.toString()); - return ++reduce; - } else - return reduce; + reduce++; + } + return reduce; } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java index 2f48de2d3..7315fe503 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java @@ -224,7 +224,7 @@ public class DiffNode extends DiffValue { // X=(A=A B=B C=(D=D)) String[] parts = tree.split("=", 2); if ( parts.length != 2 ) - throw new ReviewedStingException("Unexpected tree structure: " + tree + " parts=" + parts); + throw new ReviewedStingException("Unexpected tree structure: " + tree); String name = parts[0]; String value = parts[1]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index a509294ff..b87a8ee85 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -41,7 +41,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator { ConcurrentMap alleleCounts = new ConcurrentHashMap(); int numHetsHere = 0; - float numGenosHere = 0; + int numGenosHere = 0; int numIndsHere = 0; for (final Genotype genotype : vc.getGenotypes()) { @@ -68,7 +68,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator { //only if have one called genotype at least this.numSites++; - this.totalHet += numHetsHere / numGenosHere; + this.totalHet += numHetsHere / (double)numGenosHere; //compute based on num sites float harmonicFactor = 0; @@ -79,7 +79,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator { //now compute pairwise mismatches float numPairwise = 0; - float numDiffs = 0; + int numDiffs = 0; for (String allele1 : alleleCounts.keySet()) { int allele1Count = alleleCounts.get(allele1); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index f775c8dd6..bfd9aa52f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; @@ -311,10 +310,6 @@ public class SelectVariants extends RodWalker implements TreeR private File rsIDFile = null; - @Hidden - @Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false) - private String outMVFile = null; - @Hidden @Argument(fullName="fullyDecode", doc="If true, the incoming VariantContext will be fully decoded", required=false) private boolean fullyDecode = false; @@ -369,8 +364,6 @@ public class SelectVariants extends RodWalker implements TreeR private int positionToAdd = 0; private RandomVariantStructure [] variantArray; - private PrintStream outMVFileStream = null; - //Random number generator for the genotypes to remove private Random randomGenotypes = new Random(); @@ -528,23 +521,6 @@ public class SelectVariants extends RodWalker implements TreeR if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1) break; - if (outMVFile != null){ - for( String familyId : mv.getViolationFamilies()){ - for(Sample sample : this.getSampleDB().getFamily(familyId)){ - if(sample.getParents().size() > 0){ - outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + - "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), - vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), - sample.getMaternalID(), sample.getPaternalID(), sample.getID(), - vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(), - vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(), - vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() ); - - } - } - } - } - if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index b73a498bc..b9577ca9b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -372,7 +372,7 @@ public class VariantsToTable extends RodWalker { // ---------------------------------------------------------------------------------------------------- public static abstract class Getter { public abstract String get(VariantContext vc); } - public static Map getters = new HashMap(); + public static final Map getters = new HashMap(); static { // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 0065f9258..13571df78 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -67,10 +67,10 @@ public class BaseUtils { public static final byte DELETION_INDEX = 4; public static final byte NO_CALL_INDEX = 5; // (this is 'N') - public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G'); - public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C'); - public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A'); - public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T'); + public static final int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A'); + public static final int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C'); + public static final int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G'); + public static final int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T'); /// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or // a pyrimidine to another pyrimidine nucleotide (C <-> T). diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 439a0d8ed..f37451cba 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -68,7 +68,7 @@ public class BAQ { } // Phred scaled now (changed 1/10/2011) - public static double DEFAULT_GOP = 40; + public static final double DEFAULT_GOP = 40; /* Takes a Phred Scale quality score and returns the error probability. * diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java index 83e55cb12..9b5886c65 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java @@ -38,8 +38,8 @@ import java.util.Map; * A class representing a key=value entry in the VCF header */ public class VCFHeaderLine implements Comparable { - protected static boolean ALLOW_UNBOUND_DESCRIPTIONS = true; - protected static String UNBOUND_DESCRIPTION = "Not provided in original VCF header"; + protected static final boolean ALLOW_UNBOUND_DESCRIPTIONS = true; + protected static final String UNBOUND_DESCRIPTION = "Not provided in original VCF header"; private String mKey = null; private String mValue = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index e5cd9f4d5..8cba5ec23 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.pileup; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -220,7 +221,7 @@ public class PileupElement implements Comparable { if (isDeletion() && (offset + 1 >= read.getReadLength()) ) // deletion in the end of the read throw new UserException.MalformedBAM(read, String.format("Adjacent I/D events in read %s -- cigar: %s", read.getReadName(), read.getCigarString())); - representativeCount = (isDeletion()) ? Math.round((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2) : read.getReducedCount(offset); + representativeCount = (isDeletion()) ? MathUtils.fastRound((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2.0) : read.getReducedCount(offset); } return representativeCount; } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index 249422c17..8c8815b54 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -94,8 +94,8 @@ public class RecalDatum { * @param reportedQuality */ public RecalDatum(final long _numObservations, final long _numMismatches, final byte reportedQuality) { - if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0"); - if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0"); + if ( _numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0"); + if ( _numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0"); if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0"); numObservations = _numObservations; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index 102aa4433..41e96222c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -21,7 +21,7 @@ import java.util.Set; */ public class RecalDatumNode { private final static double SMALLEST_CHI2_PVALUE = 1e-300; - protected static Logger logger = Logger.getLogger(RecalDatumNode.class); + protected static final Logger logger = Logger.getLogger(RecalDatumNode.class); /** * fixedPenalty is this value if it's considered fixed diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index df1ff2a0e..849a7ddee 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -13,7 +13,7 @@ import org.broadinstitute.sting.utils.NGSPlatform; */ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { - public static String LANE_TAG = "LN"; + public static final String LANE_TAG = "LN"; // the SAMReadGroupRecord data we're caching private String mSample = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index c16470c48..bd908727f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -53,8 +53,8 @@ public class ReadUtils { private ReadUtils() { } - private static int DEFAULT_ADAPTOR_SIZE = 100; - public static int CLIPPING_GOAL_NOT_REACHED = -1; + private static final int DEFAULT_ADAPTOR_SIZE = 100; + public static final int CLIPPING_GOAL_NOT_REACHED = -1; public static int getMeanRepresentativeReadCount(GATKSAMRecord read) { if (!read.isReducedRead()) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index e3bef6bc5..c18f954a2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -53,7 +53,7 @@ import java.util.*; */ @Invariant({"alleles != null"}) public final class GenotypeBuilder { - public static boolean MAKE_FAST_BY_DEFAULT = true; + public static final boolean MAKE_FAST_BY_DEFAULT = true; private String sampleName = null; private List alleles = Collections.emptyList(); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index d644eda7d..7b4256b70 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -401,7 +401,7 @@ public class GenotypeLikelihoods { } // An index conversion from the deprecated PL ordering to the new VCF-based ordering for up to 3 alternate alleles - protected static int[] PLindexConversion = new int[]{0, 1, 3, 6, 2, 4, 7, 5, 8, 9}; + protected static final int[] PLindexConversion = new int[]{0, 1, 3, 6, 2, 4, 7, 5, 8, 9}; /** * get the allele index pair for the given PL using the deprecated PL ordering: diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 5421960b2..ff6b0be70 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -730,7 +730,7 @@ public class VariantContextUtils { vcList.remove(k); // avoid having empty lists if (vcList.size() == 0) - mappedVCs.remove(vcList); + mappedVCs.remove(type); if ( !mappedVCs.containsKey(vc.getType()) ) mappedVCs.put(vc.getType(), new ArrayList()); mappedVCs.get(vc.getType()).add(otherVC); From 47b4f7b7e581651f0a25f4c6bbc735012ba4038f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 14:59:05 -0400 Subject: [PATCH 148/176] One final FindBugs related fix. I think it's safe to consider these changes 'fixes' that are allowed to go in during a code freeze. --- .../sting/gatk/walkers/annotator/ChromosomeCounts.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 6bdd779c0..54837baad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -53,8 +53,8 @@ import java.util.*; */ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - protected static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; - protected static final VCFInfoHeaderLine[] descriptions = { + public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; + public static final VCFInfoHeaderLine[] descriptions = { VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY), VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY), VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) }; From a22e7a5358ee9a2e8a8475b75867e1468fbd2bbc Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 15:07:32 -0400 Subject: [PATCH 149/176] Should've run 'ant clean' instead of just 'ant'. In any event, these are 2 cases where we are setting a class's internal static variable directly. Very dangerous. --- public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java | 2 +- .../sting/utils/variantcontext/GenotypeBuilder.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index f37451cba..439a0d8ed 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -68,7 +68,7 @@ public class BAQ { } // Phred scaled now (changed 1/10/2011) - public static final double DEFAULT_GOP = 40; + public static double DEFAULT_GOP = 40; /* Takes a Phred Scale quality score and returns the error probability. * diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index c18f954a2..e3bef6bc5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -53,7 +53,7 @@ import java.util.*; */ @Invariant({"alleles != null"}) public final class GenotypeBuilder { - public static final boolean MAKE_FAST_BY_DEFAULT = true; + public static boolean MAKE_FAST_BY_DEFAULT = true; private String sampleName = null; private List alleles = Collections.emptyList(); From d8071c66ed41e3834d9aef4f7a193e12d15f7fa4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 15:22:10 -0400 Subject: [PATCH 150/176] Removing SlowGenotype object from GATK --- .../sting/gatk/GenomeAnalysisEngine.java | 4 - .../arguments/GATKArgumentCollection.java | 5 - .../utils/variantcontext/FastGenotype.java | 2 +- .../utils/variantcontext/GenotypeBuilder.java | 38 +--- .../utils/variantcontext/SlowGenotype.java | 193 ------------------ 5 files changed, 6 insertions(+), 236 deletions(-) delete mode 100755 public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 55107833d..e76cde43a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -233,10 +233,6 @@ public class GenomeAnalysisEngine { if (args.nonDeterministicRandomSeed) resetRandomGenerator(System.currentTimeMillis()); - // TODO -- REMOVE ME WHEN WE STOP BCF testing - if ( args.USE_SLOW_GENOTYPES ) - GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false; - // if the use specified an input BQSR recalibration table then enable on the fly recalibration if (args.BQSR_RECAL_FILE != null) setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 06177868a..bbbd96cf1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -379,10 +379,5 @@ public class GATKArgumentCollection { @Hidden public boolean generateShadowBCF = false; // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed - - @Argument(fullName="useSlowGenotypes",shortName = "useSlowGenotypes",doc="",required=false) - @Hidden - public boolean USE_SLOW_GENOTYPES = false; - // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java index d528bf0e4..4a7df9da4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java @@ -172,7 +172,7 @@ public final class FastGenotype extends Genotype { * @param values * @return */ - private final static boolean validADorPLField(final int[] values) { + private static boolean validADorPLField(final int[] values) { if ( values != null ) for ( int v : values ) if ( v < 0 ) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index e3bef6bc5..0ee32fa2e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -53,8 +53,6 @@ import java.util.*; */ @Invariant({"alleles != null"}) public final class GenotypeBuilder { - public static boolean MAKE_FAST_BY_DEFAULT = true; - private String sampleName = null; private List alleles = Collections.emptyList(); @@ -67,8 +65,6 @@ public final class GenotypeBuilder { private String filters = null; private int initialAttributeMapSize = 5; - private boolean useFast = MAKE_FAST_BY_DEFAULT; - private final static Map NO_ATTRIBUTES = Collections.unmodifiableMap(new HashMap(0)); @@ -78,31 +74,22 @@ public final class GenotypeBuilder { // // ----------------------------------------------------------------- - public final static Genotype create(final String sampleName, final List alleles) { + public static Genotype create(final String sampleName, final List alleles) { return new GenotypeBuilder(sampleName, alleles).make(); } - public final static Genotype create(final String sampleName, + public static Genotype create(final String sampleName, final List alleles, final Map attributes) { return new GenotypeBuilder(sampleName, alleles).attributes(attributes).make(); } - protected final static Genotype create(final String sampleName, + protected static Genotype create(final String sampleName, final List alleles, final double[] gls) { return new GenotypeBuilder(sampleName, alleles).PL(gls).make(); } - public final static Genotype create(final String sampleName, - final List alleles, - final double log10Perror, - final Map attributes) { - return new GenotypeBuilder(sampleName, alleles) - .GQ(log10Perror == SlowGenotype.NO_LOG10_PERROR ? -1 : (int)(log10Perror * -10)) - .attributes(attributes).make(); - } - /** * Create a empty builder. Both a sampleName and alleles must be provided * before trying to make a Genotype from this builder. @@ -182,23 +169,8 @@ public final class GenotypeBuilder { */ @Ensures({"result != null"}) public Genotype make() { - if ( useFast ) { - final Map ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes; - return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, filters, ea); - } else { - final Map attributes = new LinkedHashMap(); - if ( extendedAttributes != null ) attributes.putAll(extendedAttributes); - final double log10PError = GQ == -1 ? SlowGenotype.NO_LOG10_PERROR : (GQ == 0 ? 0 : GQ / -10.0); - if ( DP != -1 ) attributes.put(VCFConstants.DEPTH_KEY, DP); - if ( AD != null ) attributes.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD); - final double[] log10likelihoods = PL != null ? GenotypeLikelihoods.fromPLs(PL).getAsVector() : null; - return new SlowGenotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10likelihoods); - } - } - - public GenotypeBuilder useFast(boolean useFast) { - this.useFast = useFast; - return this; + final Map ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes; + return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, filters, ea); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java deleted file mode 100755 index c3f027484..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.variantcontext; - - -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.*; - -/** - * This class encompasses all the basic information about a genotype. It is immutable. - * - * @author Mark DePristo - */ -@Deprecated -public class SlowGenotype extends Genotype { - protected CommonInfo commonInfo; - public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; - protected List alleles = null; - protected boolean isPhased = false; - - protected SlowGenotype(final String sampleName, - final List alleles, - final double log10PError, - final String filters, - final Map attributes, - final boolean isPhased, - final double[] log10Likelihoods) { - super(sampleName, filters); - - if ( alleles == null || alleles.isEmpty() ) - this.alleles = Collections.emptyList(); - else - this.alleles = Collections.unmodifiableList(alleles); - commonInfo = new CommonInfo(sampleName, log10PError, Collections.emptySet(), attributes); - if ( log10Likelihoods != null ) - commonInfo.putAttribute(VCFConstants.GENOTYPE_PL_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods)); - this.isPhased = isPhased; - validate(); - } - - @Override public List getAlleles() { - return alleles; - } - - @Override public Allele getAllele(int i) { - if ( getType() == GenotypeType.UNAVAILABLE ) - throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype"); - return alleles.get(i); - } - - @Override public boolean isPhased() { return isPhased; } - - // - // Useful methods for getting genotype likelihoods for a genotype object, if present - // - @Override public boolean hasLikelihoods() { - return (commonInfo.hasAttribute(VCFConstants.GENOTYPE_PL_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_PL_KEY).equals(VCFConstants.MISSING_VALUE_v4)) || - (commonInfo.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)); - } - - @Override public GenotypeLikelihoods getLikelihoods() { - GenotypeLikelihoods x = getLikelihoods(VCFConstants.GENOTYPE_PL_KEY, true); - if ( x != null ) - return x; - else { - x = getLikelihoods(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, false); - return x; - } - } - - private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) { - Object x = commonInfo.getAttribute(key); - if ( x instanceof String ) { - if ( asPL ) - return GenotypeLikelihoods.fromPLField((String)x); - else - return GenotypeLikelihoods.fromGLField((String)x); - } - else if ( x instanceof GenotypeLikelihoods ) return (GenotypeLikelihoods)x; - else return null; - } - - private final void validate() { - if ( alleles.size() == 0) return; - - for ( Allele allele : alleles ) { - if ( allele == null ) - throw new IllegalArgumentException("BUG: allele cannot be null in Genotype"); - } - } - - // --------------------------------------------------------------------------------------------------------- - // - // get routines to access context info fields - // - // --------------------------------------------------------------------------------------------------------- - @Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); } - @Override public double getLog10PError() { return commonInfo.getLog10PError(); } - - @Override - public boolean hasExtendedAttribute(String key) { return commonInfo.hasAttribute(key); } - - @Override - public Object getExtendedAttribute(String key) { return commonInfo.getAttribute(key); } - - @Override - public Object getExtendedAttribute(String key, Object defaultValue) { - return commonInfo.getAttribute(key, defaultValue); - } - -// public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); } -// public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); } -// public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); } -// public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); } - - @Override - public int[] getPL() { - return hasPL() ? getLikelihoods().getAsPLs() : null; - } - - @Override - public boolean hasPL() { - return hasLikelihoods(); - } - - @Override - public int getDP() { - return commonInfo.getAttributeAsInt(VCFConstants.DEPTH_KEY, -1); - } - - @Override - public boolean hasDP() { - return commonInfo.hasAttribute(VCFConstants.DEPTH_KEY); - } - - @Override - public int[] getAD() { - if ( hasAD() ) { - return (int[])commonInfo.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS); - } else - return null; - } - - @Override - public boolean hasAD() { - return commonInfo.hasAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS); - } - - @Override - public int getGQ() { - if ( commonInfo.hasLog10PError() ) - return (int)Math.round(commonInfo.getPhredScaledQual()); - else - return -1; - } - - @Override - public boolean hasGQ() { - return hasLog10PError(); - } - - @Override - public Map getExtendedAttributes() { - final Map ea = new LinkedHashMap(commonInfo.getAttributes()); - for ( final String primary : FastGenotype.PRIMARY_KEYS ) - ea.remove(primary); - return ea; - } -} \ No newline at end of file From 05cbf1c8c09b00329d639faeb2870613f1cea12f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 15:40:52 -0400 Subject: [PATCH 151/176] FindBugs 'Efficiency' fixes --- .../gatk/walkers/genotyper/ErrorModel.java | 9 ++-- .../walkers/beagle/ProduceBeagleInput.java | 2 - .../phasing/PreciseNonNegativeDouble.java | 2 +- .../walkers/phasing/ReadBackedPhasing.java | 2 +- .../sting/utils/SWPairwiseAlignment.java | 53 ------------------- 5 files changed, 7 insertions(+), 61 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java index 8e4ca9595..26ff4db24 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java @@ -262,18 +262,19 @@ public class ErrorModel { } public String toString() { - String result = "("; + StringBuilder result = new StringBuilder("("); boolean skipComma = true; for (double v : probabilityVector.getProbabilityVector()) { if (skipComma) { skipComma = false; } else { - result += ","; + result.append(","); } - result += String.format("%.4f", v); + result.append(String.format("%.4f", v)); } - return result + ")"; + result.append(")"); + return result.toString(); } public static int getTotalReferenceDepth(HashMap perLaneErrorModels) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java index fdc333676..d11747766 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java @@ -351,7 +351,6 @@ public class ProduceBeagleInput extends RodWalker { } public static class CachingFormatter { - private int maxCacheSize = 0; private String format; private LRUCache cache; @@ -379,7 +378,6 @@ public class ProduceBeagleInput extends RodWalker { } public CachingFormatter(String format, int maxCacheSize) { - this.maxCacheSize = maxCacheSize; this.format = format; this.cache = new LRUCache(maxCacheSize); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java index b68739b48..d3f4f6266 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PreciseNonNegativeDouble.java @@ -86,7 +86,7 @@ class PreciseNonNegativeDouble implements Comparable { if (Math.abs(logValDiff) <= EQUALS_THRESH) return 0; // this.equals(other) - return new Double(Math.signum(logValDiff)).intValue(); + return (int)Math.signum(logValDiff); } public boolean equals(PreciseNonNegativeDouble other) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index f49e8f8c0..d8ae6b28b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -870,7 +870,7 @@ public class ReadBackedPhasing extends RodWalker 0 ) System.out.print(a.charAt(i-1)); - else System.out.print(' '); - System.out.print(" "); - for ( int j = 0; j < s[i].length ; j++ ) { - System.out.printf(" %4d",s[i][j]); - } - System.out.println(); - } - } - - - private void print(double[][] s, String a, String b) { - - System.out.print(""); - for ( int j = 1 ; j < s[0].length ; j++) System.out.printf(" %4c",b.charAt(j-1)) ; - System.out.println(); - - for ( int i = 0 ; i < s.length ; i++) { - if ( i > 0 ) System.out.print(a.charAt(i-1)); - else System.out.print(' '); - System.out.print(" "); - for ( int j = 0; j < s[i].length ; j++ ) { - System.out.printf(" %2.1f",s[i][j]); - } - System.out.println(); - } - } - private void print(double[] s, byte[] a, byte[] b) { int n = a.length+1; int m = b.length+1; From 3253fc216b8eb77859911ba0fb73b27509aa89c1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 15:53:06 -0400 Subject: [PATCH 152/176] FindBugs 'Maintainability' fixes --- .../net/sf/picard/reference/FastaSequenceIndexBuilder.java | 1 + .../gatk/walkers/diagnostics/targets/SampleStatistics.java | 2 +- .../sting/gatk/walkers/diffengine/BAMDiffableReader.java | 4 +++- .../gatk/walkers/diffengine/GATKReportDiffableReader.java | 6 ++++-- .../gatk/walkers/indels/ConstrainedMateFixingManager.java | 2 +- .../gatk/walkers/variantrecalibration/TrancheManager.java | 1 + .../gatk/walkers/variantutils/VariantsToBinaryPed.java | 1 + .../org/broadinstitute/sting/utils/help/ForumAPIUtils.java | 1 + 8 files changed, 13 insertions(+), 5 deletions(-) diff --git a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java index 6c8fe1834..10326ef2e 100644 --- a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java +++ b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java @@ -208,6 +208,7 @@ public class FastaSequenceIndexBuilder { break; } } + in.close(); return sequenceIndex; } catch (IOException e) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java index 0fc2d8929..9f6258eee 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatistics.java @@ -264,7 +264,7 @@ class SampleStatistics { return false; // different contigs - if (read.getMateReferenceIndex() != read.getReferenceIndex()) + if (!read.getMateReferenceIndex().equals(read.getReferenceIndex())) return false; // unmapped diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java index 2d372ca9f..0d4db5560 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java @@ -104,7 +104,9 @@ public class BAMDiffableReader implements DiffableReader { InputStream fstream = new BufferedInputStream(new FileInputStream(file)); if ( !BlockCompressedInputStream.isValidFile(fstream) ) return false; - new BlockCompressedInputStream(fstream).read(buffer, 0, BAM_MAGIC.length); + final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream); + BCIS.read(buffer, 0, BAM_MAGIC.length); + BCIS.close(); return Arrays.equals(buffer, BAM_MAGIC); } catch ( IOException e ) { return false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java index 480a1fc29..5e4ea5f81 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java @@ -90,8 +90,10 @@ public class GATKReportDiffableReader implements DiffableReader { public boolean canRead(File file) { try { final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX; - char[] buff = new char[HEADER.length()]; - new FileReader(file).read(buff, 0, HEADER.length()); + final char[] buff = new char[HEADER.length()]; + final FileReader FR = new FileReader(file); + FR.read(buff, 0, HEADER.length()); + FR.close(); String firstLine = new String(buff); return firstLine.startsWith(HEADER); } catch (IOException e) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java index 4feba35af..68365adf7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java @@ -305,7 +305,7 @@ public class ConstrainedMateFixingManager { } public static boolean iSizeTooBigToMove(SAMRecord read, int maxInsertSizeForMovingReadPairs) { - return ( read.getReadPairedFlag() && ! read.getMateUnmappedFlag() && read.getReferenceName() != read.getMateReferenceName() ) // maps to different chromosomes + return ( read.getReadPairedFlag() && ! read.getMateUnmappedFlag() && !read.getReferenceName().equals(read.getMateReferenceName()) ) // maps to different chromosomes || Math.abs(read.getInferredInsertSize()) > maxInsertSizeForMovingReadPairs; // we won't try to move such a read } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java index d45739528..af0778399 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrancheManager.java @@ -177,6 +177,7 @@ public class TrancheManager { double runningValue = metric.getRunningMetric(i); out.printf("%.4f %d %.4f%n", d.lod, score, runningValue); } + out.close(); } catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile(f, e); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java index 14c811b03..3fba8fa77 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java @@ -270,6 +270,7 @@ public class VariantsToBinaryPed extends RodWalker { inStream.read(readGenotypes); outBed.write(readGenotypes); } + inStream.close(); } catch (IOException e) { throw new ReviewedStingException("Error reading form temp file for input.",e); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java index 388e7ce45..1dfc4ecc0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java @@ -135,6 +135,7 @@ public class ForumAPIUtils { System.out.println(line); } + br.close(); httpClient.getConnectionManager().shutdown(); return output; From 6a2862e8bcfca67ed4c1169d1aac0ffab6dfdc86 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 16:23:45 -0400 Subject: [PATCH 153/176] GSA-483: Bug in GATKdocs for Enums -- Fixed to no long show constants in enums as constant values in the gatkdocs --- .../help/GenericDocumentationHandler.java | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index dc0668cea..ab5181b45 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -584,20 +584,39 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { * @return */ @Requires("enumClass.isEnum()") - private List> docForEnumArgument(Class enumClass) { - ClassDoc doc = this.getDoclet().getClassDocForClass(enumClass); - if (doc == null) // || ! doc.isEnum() ) + private List> docForEnumArgument(final Class enumClass) { + final ClassDoc doc = this.getDoclet().getClassDocForClass(enumClass); + if ( doc == null ) throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got null instead"); - List> bindings = new ArrayList>(); - for (final FieldDoc field : doc.fields(false)) { - bindings.add( - new HashMap() {{ - put("name", field.name()); - put("summary", field.commentText()); - }}); + final Set enumConstantFieldNames = enumConstantsNames(enumClass); + + final List> bindings = new ArrayList>(); + for (final FieldDoc fieldDoc : doc.fields(false)) { + if (enumConstantFieldNames.contains(fieldDoc.name()) ) + bindings.add( + new HashMap() {{ + put("name", fieldDoc.name()); + put("summary", fieldDoc.commentText()); + }}); } return bindings; } + + /** + * Returns the name of the fields that are enum constants according to reflection + * + * @return a non-null set of fields that are enum constants + */ + private Set enumConstantsNames(final Class enumClass) { + final Set enumConstantFieldNames = new HashSet(); + + for ( final Field field : enumClass.getFields() ) { + if ( field.isEnumConstant() ) + enumConstantFieldNames.add(field.getName()); + } + + return enumConstantFieldNames; + } } From 65c594afff5825e9f9386f21daecb61955632d3b Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 21:27:07 -0400 Subject: [PATCH 154/176] Better error message for reads that begin/end with a deletion in LIBS --- .../sting/gatk/iterators/LocusIteratorByState.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 1606c227d..f97069189 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -159,7 +159,7 @@ public class LocusIteratorByState extends LocusIterator { return stepForwardOnGenome(); } else { if (curElement != null && curElement.getOperator() == CigarOperator.D) - throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads ending in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar"); + throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); // Reads that contain indels model the genomeOffset as the following base in the reference. Because // we fall into this else block only when indels end the read, increment genomeOffset such that the @@ -185,7 +185,7 @@ public class LocusIteratorByState extends LocusIterator { break; case D: // deletion w.r.t. the reference if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string - throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar"); + throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar"); // should be the same as N case genomeOffset++; done = true; From 53383e82ecd9e3de2d4506daee6dc4fae8e81773 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 16 Aug 2012 21:41:18 -0400 Subject: [PATCH 155/176] Hmm, not good. Fixing the math in PBT resulted in changed MD5s for integration tests that look like significant changes. I am reverting and will report this to Laurent. --- .../sting/gatk/walkers/phasing/PhaseByTransmission.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 0dcafb30a..bbd4bf92f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -645,7 +645,7 @@ public class PhaseByTransmission extends RodWalker, HashMa bestChildGenotype.clear(); bestChildGenotype.add(childGenotype.getKey()); } - else if(MathUtils.compareDoubles(configurationLikelihood, bestConfigurationLikelihood) == 0) { + else if(configurationLikelihood == bestConfigurationLikelihood) { bestFirstParentGenotype.add(firstParentGenotype.getKey()); bestSecondParentGenotype.add(secondParentGenotype.getKey()); bestChildGenotype.add(childGenotype.getKey()); From 67ebd65512d9dfc63bb85378c2833cc501ac4182 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 17 Aug 2012 10:13:40 -0400 Subject: [PATCH 157/176] Bugfix for potential SEGFAULT with JNA getting execution hosts for LSF with multiple hosts --- .../sting/queue/engine/lsf/Lsf706JobRunner.scala | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala index de996d187..2fbea1497 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala @@ -35,7 +35,7 @@ import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner import java.util.regex.Pattern import java.lang.StringBuffer import java.util.Date -import com.sun.jna.{Structure, StringArray, NativeLong} +import com.sun.jna.{Pointer, Structure, StringArray, NativeLong} import com.sun.jna.ptr.IntByReference /** @@ -295,9 +295,17 @@ object Lsf706JobRunner extends Logging { // the platform LSF startTimes are in seconds, not milliseconds, so convert to the java convention runner.getRunInfo.startTime = new Date(jobInfo.startTime.longValue * 1000) runner.getRunInfo.doneTime = new Date(jobInfo.endTime.longValue * 1000) - val exHostsRaw = jobInfo.exHosts.getStringArray(0) - //logger.warn("exHostsRaw = " + exHostsRaw) - val exHostsList = exHostsRaw.toSeq + + val exHostsList = + if (jobInfo.numExHosts != 1) { + // this is necessary because + val exHostsString = "multipleHosts_" + jobInfo.numExHosts + logger.debug("numExHosts = " + jobInfo.numExHosts + " != 1 for job " + runner.jobId + ", cannot safely get exhosts, setting to " + exHostsString) + List(exHostsString) + } else { + jobInfo.exHosts.getStringArray(0).toSeq + } + //logger.warn("exHostsList = " + exHostsList) val exHosts = exHostsList.reduceLeft(_ + "," + _) //logger.warn("exHosts = " + exHosts) From de3be4580652624d28f85ddd53719052ed9b07d0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 17 Aug 2012 10:14:19 -0400 Subject: [PATCH 158/176] Proper function call in BCF2Decoder to validateReadBytes --- .../sting/utils/codecs/bcf2/BCF2Decoder.java | 48 ++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index d7f59632c..05ba2aa1f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -82,7 +82,7 @@ public final class BCF2Decoder { public void skipNextBlock(final int blockSizeInBytes, final InputStream stream) { try { final int bytesRead = (int)stream.skip(blockSizeInBytes); - validateReadBytes(bytesRead, blockSizeInBytes); + validateReadBytes(bytesRead, 1, blockSizeInBytes); } catch ( IOException e ) { throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e); } @@ -316,17 +316,37 @@ public final class BCF2Decoder { } /** + * Read all bytes for a BCF record block into a byte[], and return it * - * @param inputStream - * @return + * Is smart about reading from the stream multiple times to fill the buffer, if necessary + * + * @param blockSizeInBytes number of bytes to read + * @param inputStream the stream to read from + * @return a non-null byte[] containing exactly blockSizeInBytes bytes from the inputStream */ - private final static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) { + @Requires({"blockSizeInBytes >= 0", "inputStream != null"}) + @Ensures("result != null") + private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) { assert blockSizeInBytes >= 0; final byte[] record = new byte[blockSizeInBytes]; try { - final int bytesRead = inputStream.read(record); - validateReadBytes(bytesRead, blockSizeInBytes); + int bytesRead = 0; + int nReadAttempts = 0; // keep track of how many times we've read + + // because we might not read enough bytes from the file in a single go, do it in a loop until we get EOF + while ( bytesRead < blockSizeInBytes ) { + final int read1 = inputStream.read(record, bytesRead, blockSizeInBytes - bytesRead); + if ( read1 == -1 ) + validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes); + else + bytesRead += read1; + } + + if ( nReadAttempts > 1 ) // TODO -- remove me + logger.warn("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior"); + + validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes); } catch ( IOException e ) { throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e); } @@ -334,14 +354,20 @@ public final class BCF2Decoder { return record; } - private final static void validateReadBytes(final int actuallyRead, final int expected) { + /** + * Make sure we read the right number of bytes, or throw an error + * + * @param actuallyRead + * @param nReadAttempts + * @param expected + */ + private static void validateReadBytes(final int actuallyRead, final int nReadAttempts, final int expected) { assert expected >= 0; if ( actuallyRead < expected ) { - throw new UserException.MalformedBCF2(String.format("Failed to read next complete record: %s", - actuallyRead == -1 ? - "premature end of input stream" : - String.format("expected %d bytes but read only %d", expected, actuallyRead))); + throw new UserException.MalformedBCF2( + String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations", + expected, actuallyRead, nReadAttempts)); } } From 4c0f198d485a62544fe7b9115b080a9a5318ab11 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 17 Aug 2012 10:17:12 -0400 Subject: [PATCH 159/176] Potential fix for GSA-484: Incomplete writing of temp BCF when running CombineVariants in parallel -- Keep reading from BCF2 input stream when read(byte[]) returns < number of needed bytes -- It's possible (I think) that the failure in GSA-484 is due to multi-threading writing/reading of BCF2 records where the underlying stream is not yet flushed so read(byte[]) returns a partial result. No loops until we get all of the needed bytes or EOF is encounted --- .../org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index fc0b3c4a9..60fcb6585 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -118,6 +118,7 @@ public final class BCF2Codec implements FeatureCodec { final int sitesBlockSize = decoder.readBlockSize(inputStream); final int genotypeBlockSize = decoder.readBlockSize(inputStream); + decoder.readNextBlock(sitesBlockSize, inputStream); decodeSiteLoc(builder); final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); From a3d2764d11ce75cbc5258054eb7c547e2c0981a2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 17 Aug 2012 10:35:25 -0400 Subject: [PATCH 160/176] Fixed: GSA-392 @arguments with just a short name get the wrong argument bindings -- Now blows up if an argument begins with -. Implementation isn't pretty, as it actually blows up during Queue extension creation with a somewhat obscure error message but at least its something. --- .../sting/commandline/ArgumentDefinition.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java index a5647ec0f..618120217 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.lang.annotation.Annotation; import java.util.List; @@ -147,6 +148,9 @@ public class ArgumentDefinition { this.exclusiveOf = exclusiveOf; this.validation = validation; this.validOptions = validOptions; + + validateName(shortName); + validateName(fullName); } /** @@ -192,6 +196,9 @@ public class ArgumentDefinition { else shortName = null; + validateName(shortName); + validateName(fullName); + this.ioType = ioType; this.argumentType = argumentType; this.fullName = fullName; @@ -277,4 +284,14 @@ public class ArgumentDefinition { String validation = (String)CommandLineUtils.getValue(annotation, "validation"); return validation.trim().length() > 0 ? validation.trim() : null; } + + /** + * Make sure the argument's name is valid + * + * @param name + */ + private void validateName(final String name) { + if ( name != null && name.startsWith("-") ) + throw new ReviewedStingException("Invalid argument definition: " + name + " begins with a -"); + } } From be0f8beebbb2cc5acd265c65a03f06a09794c396 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 17 Aug 2012 11:21:49 -0400 Subject: [PATCH 161/176] Fixed GSA-434: GATK should generate error when gzipped FASTA is passed in. -- The GATK sort of handles this now, but only if you have the exactly correct sequence dictionary and FAI files associated with the reference. If you do, the file can be .gz. If not, the GATK will fail on creating the FAI and DICT files. Added an error message that handles this case and clearly says what to do. --- .../reference/ReferenceDataSource.java | 31 ++++++++++--------- .../sting/utils/exceptions/UserException.java | 11 +++++++ 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index b131e36c1..c02ae7d99 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -62,23 +62,24 @@ public class ReferenceDataSource { * @param fastaFile Fasta file to be used as reference */ public ReferenceDataSource(File fastaFile) { - // does the fasta file exist? check that first... if (!fastaFile.exists()) throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist."); - File indexFile = new File(fastaFile.getAbsolutePath() + ".fai"); - File dictFile; - if (fastaFile.getAbsolutePath().endsWith("fa")) { - dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict")); - } - else - dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict")); + final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz"); + + final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai"); + + // determine the name for the dict file + final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : ""); + final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict")); /* - if index file does not exist, create it manually - */ + * if index file does not exist, create it manually + */ if (!indexFile.exists()) { + if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile); + logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath())); FSLockWithShared indexLock = new FSLockWithShared(indexFile,true); try { @@ -95,7 +96,7 @@ public class ReferenceDataSource { } catch(UserException e) { // Rethrow all user exceptions as-is; there should be more details in the UserException itself. - throw e; + throw e; } catch (Exception e) { // If lock creation succeeded, the failure must have been generating the index. @@ -114,6 +115,8 @@ public class ReferenceDataSource { * This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary */ if (!dictFile.exists()) { + if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile); + logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath())); /* @@ -218,9 +221,9 @@ public class ReferenceDataSource { for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) { final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength()); shards.add(new LocusShard(parser, - readsDataSource, - Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)), - null)); + readsDataSource, + Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)), + null)); } } return shards; diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index bda03f675..3130469e5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -340,6 +340,17 @@ public class UserException extends ReviewedStingException { } } + public static class CouldNotCreateReferenceFAIorDictForGzippedRef extends UserException { + public CouldNotCreateReferenceFAIorDictForGzippedRef(final File f) { + super("Although the GATK can process .gz reference sequences, it currently cannot create FAI " + + "or DICT files for them. In order to use the GATK with reference.fasta.gz you will need to " + + "create .dict and .fai files for reference.fasta.gz and name them reference.fasta.gz.fai and " + + "reference.dict. Potentially the easiest way to do this is to uncompress reference.fasta, " + + "run the GATK to create the .dict and .fai files, and copy them to the appropriate location. " + + "Sorry for the inconvenience."); + } + } + public static class CouldNotCreateReferenceIndexFileBecauseOfLock extends UserException.CouldNotCreateReferenceIndexFile { public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) { super(f, "could not be written because an exclusive file lock could not be obtained. " + From daa26cc64e1ae26bc2a38fded3b65f4bf589411b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 17 Aug 2012 11:41:07 -0400 Subject: [PATCH 162/176] Print to logger not to System.out in CachingIndexFastaSequenceFile when profiling cache performance --- .../sting/utils/fasta/CachingIndexedFastaSequenceFile.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java index 44b586bcd..48706543a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -41,6 +41,8 @@ import java.util.Arrays; * Thread-safe! Uses a lock object to protect write and access to the cache. */ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { + protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class); + /** global enable flag */ private static final boolean USE_CACHE = true; @@ -125,7 +127,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { public void printEfficiency() { // comment out to disable tracking if ( (cacheHits + cacheMisses) % PRINT_FREQUENCY == 0 ) { - System.out.printf("### CachingIndexedFastaReader: hits=%d misses=%d efficiency %.6f%%%n", cacheHits, cacheMisses, calcEfficiency()); + logger.info(String.format("### CachingIndexedFastaReader: hits=%d misses=%d efficiency %.6f%%%n", cacheHits, cacheMisses, calcEfficiency())); } } From bf6c0aaa57552f920e8c76f353a5a610f94104c3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 17 Aug 2012 11:48:05 -0400 Subject: [PATCH 163/176] Fix for missing formatter in R 2.15 -- VariantCallQC now works on newest ESP call set --- .../broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R index 19567e7e6..45dacd835 100644 --- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R +++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R @@ -207,7 +207,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample", if ( requestedStrat == "Sample" ) { perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale - perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "") + perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)") } else { # by AlleleCount perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs))) perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount") From 2676b7fc2e7d8ced029830bc49f2d6bcdedbe6da Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 17 Aug 2012 11:49:53 -0400 Subject: [PATCH 165/176] Put in a sanity check that MLEAC <= AN --- .../walkers/genotyper/UnifiedGenotyperEngine.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 05e12b43d..67ade390f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -442,10 +443,17 @@ public class UnifiedGenotyperEngine { // add the MLE AC and AF annotations if ( alleleCountsofMLE.size() > 0 ) { attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE); - final double AN = (double)builder.make().getCalledChrCount(); + final int AN = builder.make().getCalledChrCount(); + + // let's sanity check that we don't have an invalid MLE value in there + for ( int MLEAC : alleleCountsofMLE ) { + if ( MLEAC > AN ) + throw new ReviewedStingException(String.format("MLEAC value (%d) is larger than AN (%d) at position %s:%d", MLEAC, AN, loc.getContig(), loc.getStart())); + } + final ArrayList MLEfrequencies = new ArrayList(alleleCountsofMLE.size()); for ( int AC : alleleCountsofMLE ) - MLEfrequencies.add((double)AC / AN); + MLEfrequencies.add((double)AC / (double)AN); attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); } From 980685af16160fee6677f86021e090d71b162774 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 17 Aug 2012 14:55:17 -0400 Subject: [PATCH 166/176] Fix GSA-137: Having both DataSource.REFERENCE and DataSource.REFERENCE_BASES is confusing to end users. -- Removed REFERENCE_BASES option. You only have REFERENCE now. There's no efficiency savings for the REFERENCE_BASES option any longer, since the reference bases are loaded lazy so if you don't use them there's effectively no cost to making the RefContext that could load them. --- .../sting/gatk/traversals/TraverseReads.java | 4 +--- .../sting/gatk/walkers/ActiveRegionWalker.java | 2 +- .../sting/gatk/walkers/DataSource.java | 12 +++++++++++- .../sting/gatk/walkers/LocusWalker.java | 2 +- .../sting/gatk/walkers/ReadWalker.java | 2 +- .../broadinstitute/sting/gatk/walkers/RefWalker.java | 2 +- .../sting/gatk/walkers/bqsr/BaseRecalibrator.java | 2 +- 7 files changed, 17 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index cb094d29b..d29e9a5f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -75,8 +75,6 @@ public class TraverseReads extends TraversalEngine,Read if( !dataProvider.hasReads() ) throw new IllegalArgumentException("Unable to traverse reads; no read data is available."); - boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES); - ReadView reads = new ReadView(dataProvider); ReadReferenceView reference = new ReadReferenceView(dataProvider); @@ -91,7 +89,7 @@ public class TraverseReads extends TraversalEngine,Read ReferenceContext refContext = null; // get the array of characters for the reference sequence, since we're a mapped read - if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference()) + if (!read.getReadUnmappedFlag() && dataProvider.hasReference()) refContext = reference.getReferenceContext(read); // update the number of reads we've seen diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index aba508b3e..cbe791353 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -28,7 +28,7 @@ import java.util.List; */ @By(DataSource.READS) -@Requires({DataSource.READS, DataSource.REFERENCE_BASES}) +@Requires({DataSource.READS, DataSource.REFERENCE}) @PartitionBy(PartitionType.READ) @ActiveRegionExtension(extension=50,maxRegion=1500) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java index a152ab137..1f93c67a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java @@ -16,8 +16,18 @@ package org.broadinstitute.sting.gatk.walkers; * Allow user to choose between a number of different data sources. */ public enum DataSource { + /** + * Does this walker require read (BAM) data to work? + */ READS, + + /** + * Does this walker require reference data to work? + */ REFERENCE, - REFERENCE_BASES, // Do I actually need the reference bases passed to the walker? + + /** + * Does this walker require reference order data (VCF) to work? + */ REFERENCE_ORDERED_DATA } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 2a92d8831..3b18dda44 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -16,7 +16,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; * To change this template use File | Settings | File Templates. */ @By(DataSource.READS) -@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) +@Requires({DataSource.READS,DataSource.REFERENCE}) @PartitionBy(PartitionType.LOCUS) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) @RemoveProgramRecords diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index 8933bd73e..77e3af93f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * Time: 2:52:28 PM * To change this template use File | Settings | File Templates. */ -@Requires({DataSource.READS, DataSource.REFERENCE_BASES}) +@Requires({DataSource.READS, DataSource.REFERENCE}) @PartitionBy(PartitionType.READ) public abstract class ReadWalker extends Walker { public boolean requiresOrderedReads() { return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java index 1d3debb48..45bd14d4e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java @@ -8,7 +8,7 @@ package org.broadinstitute.sting.gatk.walkers; * To change this template use File | Settings | File Templates. */ @By(DataSource.REFERENCE) -@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES}) +@Requires({DataSource.REFERENCE}) @Allows(DataSource.REFERENCE) public abstract class RefWalker extends LocusWalker { } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 3f35cf8e8..e45cad971 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -107,7 +107,7 @@ import java.util.ArrayList; @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @By(DataSource.READS) @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file -@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) // filter out all reads with zero or unavailable mapping quality +@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality @PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta public class BaseRecalibrator extends LocusWalker implements TreeReducible { @ArgumentCollection From d16cb68539af7011dfafe55f3eec277563b5ac9d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 17 Aug 2012 14:20:15 -0400 Subject: [PATCH 167/176] Updated and more thorough version of the BadCigar read filter * No reads with Hard/Soft clips in the middle of the cigar * No reads starting with deletions (with or without preceding clips) * No reads ending in deletions (with or without follow-up clips) * No reads that are fully hard or soft clipped * No reads that have consecutive indels in the cigar (II, DD, ID or DI) Also added systematic test for good cigars and iterative test for bad cigars. --- .../sting/gatk/filters/BadCigarFilter.java | 94 +++++++++++---- .../gatk/filters/BadCigarFilterUnitTest.java | 74 ++++++------ .../utils/clipping/ReadClipperTestUtils.java | 109 +++++++++++++++--- 3 files changed, 210 insertions(+), 67 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java index 9a1455859..cda7392ae 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java @@ -29,9 +29,19 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import java.util.Iterator; + /** * Filter out reads with wonky cigar strings. * + * - No reads with Hard/Soft clips in the middle of the cigar + * - No reads starting with deletions (with or without preceding clips) + * - No reads ending in deletions (with or without follow-up clips) + * - No reads that are fully hard or soft clipped + * - No reads that have consecutive indels in the cigar (II, DD, ID or DI) + * + * ps: apparently an empty cigar is okay... + * * @author ebanks * @version 0.1 */ @@ -40,28 +50,72 @@ public class BadCigarFilter extends ReadFilter { public boolean filterOut(final SAMRecord rec) { final Cigar c = rec.getCigar(); - if( c.isEmpty() ) { return false; } // if there is no Cigar then it can't be bad - boolean previousElementWasIndel = false; - CigarOperator lastOp = c.getCigarElement(0).getOperator(); - - if (lastOp == CigarOperator.D) // filter out reads starting with deletion - return true; - - for (CigarElement ce : c.getCigarElements()) { - CigarOperator op = ce.getOperator(); - if (op == CigarOperator.D || op == CigarOperator.I) { - if (previousElementWasIndel) - return true; // filter out reads with adjacent I/D - - previousElementWasIndel = true; - } - else // this is a regular base (match/mismatch/hard or soft clip) - previousElementWasIndel = false; // reset the previous element - - lastOp = op; + // if there is no Cigar then it can't be bad + if( c.isEmpty() ) { + return false; } - return lastOp == CigarOperator.D; + Iterator elementIterator = c.getCigarElements().iterator(); + + CigarOperator firstOp = CigarOperator.H; + while (elementIterator.hasNext() && (firstOp == CigarOperator.H || firstOp == CigarOperator.S)) { + CigarOperator op = elementIterator.next().getOperator(); + + // No reads with Hard/Soft clips in the middle of the cigar + if (firstOp != CigarOperator.H && op == CigarOperator.H) { + return true; + } + firstOp = op; + } + + // No reads starting with deletions (with or without preceding clips) + if (firstOp == CigarOperator.D) { + return true; + } + + boolean hasMeaningfulElements = (firstOp != CigarOperator.H && firstOp != CigarOperator.S); + boolean previousElementWasIndel = firstOp == CigarOperator.I; + CigarOperator lastOp = firstOp; + CigarOperator previousOp = firstOp; + + while (elementIterator.hasNext()) { + CigarOperator op = elementIterator.next().getOperator(); + + if (op != CigarOperator.S && op != CigarOperator.H) { + + // No reads with Hard/Soft clips in the middle of the cigar + if (previousOp == CigarOperator.S || previousOp == CigarOperator.H) + return true; + + lastOp = op; + + if (!hasMeaningfulElements && op.consumesReadBases()) { + hasMeaningfulElements = true; + } + + if (op == CigarOperator.I || op == CigarOperator.D) { + + // No reads that have consecutive indels in the cigar (II, DD, ID or DI) + if (previousElementWasIndel) { + return true; + } + previousElementWasIndel = true; + } + else { + previousElementWasIndel = false; + } + } + // No reads with Hard/Soft clips in the middle of the cigar + else if (op == CigarOperator.S && previousOp == CigarOperator.H) { + return true; + } + + previousOp = op; + } + + // No reads ending in deletions (with or without follow-up clips) + // No reads that are fully hard or soft clipped + return lastOp == CigarOperator.D || !hasMeaningfulElements; } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java index 333d35641..ff918db68 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java @@ -1,11 +1,14 @@ package org.broadinstitute.sting.gatk.filters; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import net.sf.samtools.Cigar; +import org.broadinstitute.sting.utils.clipping.ReadClipperTestUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; +import java.util.List; + /** * Checks that the Bad Cigar filter works for all kinds of wonky cigars * @@ -14,6 +17,29 @@ import org.testng.annotations.Test; */ public class BadCigarFilterUnitTest { + public static final String[] BAD_CIGAR_LIST = { + "2D4M", // starting with multiple deletions + "4M2D", // ending with multiple deletions + "3M1I1D", // adjacent indels AND ends in deletion + "1M1I1D2M", // adjacent indels I->D + "1M1D2I1M", // adjacent indels D->I + "1M1I2M1D", // ends in single deletion with insertion in the middle + "4M1D", // ends in single deletion + "1D4M", // starts with single deletion + "2M1D1D2M", // adjacent D's + "1M1I1I1M", // adjacent I's + "1H1D4M", // starting with deletion after H + "1S1D3M", // starting with deletion after S + "1H1S1D3M", // starting with deletion after HS + "4M1D1H", // ending with deletion before H + "3M1D1S", // ending with deletion before S + "3M1D1S1H", // ending with deletion before HS + "10M2H10M", // H in the middle + "10M2S10M", // S in the middle + "1H1S10M2S10M1S1H", // deceiving S in the middle + "1H1S10M2H10M1S1H" // deceiving H in the middle + }; + BadCigarFilter filter; @BeforeClass @@ -21,40 +47,20 @@ public class BadCigarFilterUnitTest { filter = new BadCigarFilter(); } - @Test + @Test(enabled = true) public void testWonkyCigars () { - byte[] bases = {'A', 'A', 'A', 'A'}; - byte[] quals = {30, 30, 30, 30}; - GATKSAMRecord read; - // starting with multiple deletions - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "2D4M"); - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); + for (String cigarString : BAD_CIGAR_LIST) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigarString); + Assert.assertTrue(filter.filterOut(read), read.getCigarString()); + } + } - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "4M2D"); // ending with multiple deletions - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); - - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "3M1I1D"); // adjacent indels AND ends in deletion - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); - - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I1D2M"); // adjacent indels I->D - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); - - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1D2I1M"); // adjacent indels D->I - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); - - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I2M1D"); // ends in single deletion with insertion in the middle - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); - - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "4M1D"); // ends in single deletion - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); - - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1D4M"); // starts with single deletion - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); - - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "2M1D1D2M"); // adjacent D's - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); - - read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I1I1M"); // adjacent I's - Assert.assertTrue(filter.filterOut(read), read.getCigarString()); + @Test(enabled = true) + public void testGoodCigars() { + List cigarList = ReadClipperTestUtils.generateCigarList(10); + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + Assert.assertFalse(filter.filterOut(read), read.getCigarString()); + } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java index baa2f6218..208c14fbd 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java @@ -4,6 +4,7 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; @@ -37,17 +38,22 @@ public class ReadClipperTestUtils { return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString()); } - /** - * This function generates every valid permutation of cigar strings with a given length. - * - * A valid cigar object obeys the following rules: - * - No Hard/Soft clips in the middle of the read - * - No deletions in the beginning / end of the read - * - No repeated adjacent element (e.g. 1M2M -> this should be 3M) - * - * @param maximumLength the maximum number of elements in the cigar - * @return a list with all valid Cigar objects - */ + public static GATKSAMRecord makeReadFromCigar(String cigarString) { + return makeReadFromCigar(cigarFromString(cigarString)); + } + + /** + * This function generates every valid permutation of cigar strings with a given length. + * + * A valid cigar object obeys the following rules: + * - No Hard/Soft clips in the middle of the read + * - No deletions in the beginning / end of the read + * - No repeated adjacent element (e.g. 1M2M -> this should be 3M) + * - No consecutive I/D elements + * + * @param maximumLength the maximum number of elements in the cigar + * @return a list with all valid Cigar objects + */ public static List generateCigarList(int maximumLength) { int numCigarElements = cigarElements.length; LinkedList cigarList = new LinkedList(); @@ -137,7 +143,10 @@ public class ReadClipperTestUtils { CigarElement lastElement = null; int lastElementLength = 0; for (CigarElement cigarElement : rawCigar.getCigarElements()) { - if (lastElement != null && lastElement.getOperator() == cigarElement.getOperator()) + if (lastElement != null && + ((lastElement.getOperator() == cigarElement.getOperator()) || + (lastElement.getOperator() == CigarOperator.I && cigarElement.getOperator() == CigarOperator.D) || + (lastElement.getOperator() == CigarOperator.D && cigarElement.getOperator() == CigarOperator.I))) lastElementLength += cigarElement.getLength(); else { @@ -191,7 +200,7 @@ public class ReadClipperTestUtils { /** * Checks whether or not the read has any cigar element that is not H or S * - * @param read + * @param read the read * @return true if it has any M, I or D, false otherwise */ public static boolean readHasNonClippedBases(GATKSAMRecord read) { @@ -201,5 +210,79 @@ public class ReadClipperTestUtils { return false; } + public static Cigar cigarFromString(String cigarString) { + Cigar cigar = new Cigar(); + + boolean isNumber = false; + int number = 0; + for (int i = 0; i < cigarString.length(); i++) { + char x = cigarString.charAt(i); + + if (x >= '0' && x <='9') { + if (isNumber) { + number *= 10; + } + else { + isNumber = true; + } + number += x - '0'; + } + + else { + CigarElement e; + switch (x) { + case 'M': + case 'm': + e = new CigarElement(number, CigarOperator.M); + break; + + case 'I': + case 'i': + e = new CigarElement(number, CigarOperator.I); + break; + + case 'D': + case 'd': + e = new CigarElement(number, CigarOperator.D); + break; + + case 'S': + case 's': + e = new CigarElement(number, CigarOperator.S); + break; + + case 'N': + case 'n': + e = new CigarElement(number, CigarOperator.N); + break; + + case 'H': + case 'h': + e = new CigarElement(number, CigarOperator.H); + break; + + case 'P': + case 'p': + e = new CigarElement(number, CigarOperator.P); + break; + + case '=': + e = new CigarElement(number, CigarOperator.EQ); + break; + + case 'X': + case 'x': + e = new CigarElement(number, CigarOperator.X); + break; + + default: + throw new ReviewedStingException("Unrecognized cigar operator: " + x + " (number: " + number + ")"); + } + cigar.add(e); + } + } + return cigar; + } + } From 9121b9816741fd597321a587f3fa9d18c24e0ed4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 19 Aug 2012 10:29:38 -0400 Subject: [PATCH 170/176] CombineVariants outputs the first non-MISSING qual, not the maximum -- When merging multiple VCF records at a site, the combined VCF record has the QUAL of the first VCF record with a non-MISSING QUAL value. The previous behavior was to take the max QUAL, which resulted in sometime strange downstream confusion. --- .../sting/gatk/walkers/variantutils/CombineVariants.java | 7 +++++++ .../sting/utils/variantcontext/VariantContextUtils.java | 6 ++++-- .../variantutils/CombineVariantsIntegrationTest.java | 6 +++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 8dabd49b8..555999bdb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -73,6 +73,13 @@ import java.util.*; * efficiency. However, since this merge runs in only one thread, you can quickly reach diminishing * returns with the number of parallel threads. -nt 4 works well but -nt 8 may be too much. * + * Some fine details about the merging algorithm: + *
    + *
  • As of GATK 2.1, when merging multiple VCF records at a site, the combined VCF record has the QUAL of + * the first VCF record with a non-MISSING QUAL value. The previous behavior was to take the + * max QUAL, which resulted in sometime strange downstream confusion
  • + *
+ * *

Input

*

* One or more variant sets to combine. diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index ff6b0be70..d7e4a7135 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -514,7 +514,7 @@ public class VariantContextUtils { int depth = 0; int maxAC = -1; final Map attributesWithMaxAC = new LinkedHashMap(); - double log10PError = 1; + double log10PError = CommonInfo.NO_LOG10_PERROR; VariantContext vcWithMaxAC = null; GenotypesContext genotypes = GenotypesContext.create(); @@ -542,7 +542,9 @@ public class VariantContextUtils { mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY); - log10PError = Math.min(log10PError, vc.isVariant() ? vc.getLog10PError() : 1); + // We always take the QUAL of the first VC with a non-MISSING qual for the combined value + if ( log10PError == CommonInfo.NO_LOG10_PERROR ) + log10PError = vc.getLog10PError(); filters.addAll(vc.getFilters()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 9ea751b72..c32d77f82 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -113,13 +113,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5d04f22ef88ed9226cbd7b4483c5cb23"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "629656bfef7713c23f3a593523503b2f"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e54d0dcf14f90d5c8e58b45191dd0219"); } @Test public void uniqueSNPs() { // parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s - combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "acc70f33be741b564f7be9aa3f819dd4", true); + combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "e5ea6ac3905bd9eeea1a2ef5d2cb5af7", true); } @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); } @@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019")); + Arrays.asList("e5f0e7a80cd392172ebf5ddb06b91a00")); cvExecuteTest("threeWayWithRefs", spec, true); } From 7fa76f719b7f1c348d6f8d70bad3ae03bda42e09 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 19 Aug 2012 10:32:55 -0400 Subject: [PATCH 171/176] Print "Parsing data stream with BCF version BCFx.y" in BCF2 codec as .debug not .info --- .../org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 60fcb6585..c221b8fba 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -149,7 +149,7 @@ public final class BCF2Codec implements FeatureCodec { if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION ) error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion()); - logger.info("Parsing data stream with BCF version " + bcfVersion); + logger.debug("Parsing data stream with BCF version " + bcfVersion); final int headerSizeInBytes = BCF2Type.INT32.read(inputStream); From 97b191f5787638223d858f6f6cdd6099d0166396 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 20 Aug 2012 01:16:23 -0400 Subject: [PATCH 172/176] Thanks to Guillermo I was able to isolate an instance of where the MLEAC > AN. It turns out that this is valid, e.g. when PLs are all 0s for a sample we no-call it but it's allowed to factor into the MLE (since that's the contract with the exact model). Removing the check in UG and instead protecting for it in the AlleleCount stratification. --- .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 10 ++-------- .../varianteval/stratifications/AlleleCount.java | 5 +++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 67ade390f..3d9724ffb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -444,16 +444,10 @@ public class UnifiedGenotyperEngine { if ( alleleCountsofMLE.size() > 0 ) { attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE); final int AN = builder.make().getCalledChrCount(); - - // let's sanity check that we don't have an invalid MLE value in there - for ( int MLEAC : alleleCountsofMLE ) { - if ( MLEAC > AN ) - throw new ReviewedStingException(String.format("MLEAC value (%d) is larger than AN (%d) at position %s:%d", MLEAC, AN, loc.getContig(), loc.getStart())); - } - final ArrayList MLEfrequencies = new ArrayList(alleleCountsofMLE.size()); + // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1) for ( int AC : alleleCountsofMLE ) - MLEfrequencies.add((double)AC / (double)AN); + MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN)); attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 00a593768..2b1bd9c62 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -46,7 +46,8 @@ public class AlleleCount extends VariantStratifier { int AC = 0; // by default, the site is considered monomorphic if ( eval.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && eval.isBiallelic() ) { - AC = eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0); + // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1) + AC = Math.min(eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0), nchrom); } else if ( eval.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && eval.isBiallelic() ) { AC = eval.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0); } else if ( eval.isVariant() ) { @@ -56,7 +57,7 @@ public class AlleleCount extends VariantStratifier { // make sure that the AC isn't invalid if ( AC > nchrom ) - throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d " + + throw new UserException.MalformedVCF(String.format("The AC value (%d) at position %s:%d " + "is larger than the number of chromosomes over all samples (%d)", AC, eval.getChr(), eval.getStart(), nchrom)); From 154f65e0dec1e75fc318b501014e65b2f833a73c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 20 Aug 2012 12:43:17 -0400 Subject: [PATCH 173/176] Temporarily disabling multi-threaded usage of BaseRecalibrator for performance reasons. --- .../sting/gatk/walkers/bqsr/BQSRIntegrationTest.java | 9 +++++---- .../sting/gatk/walkers/bqsr/BaseRecalibrator.java | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index bd75806dd..580667ee2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -75,10 +75,11 @@ public class BQSRIntegrationTest extends WalkerTest { Arrays.asList(params.md5)); executeTest("testBQSR-"+params.args, spec).getFirst(); - WalkerTestSpec specNT2 = new WalkerTestSpec( - params.getCommandLine() + " -nt 2", - Arrays.asList(params.md5)); - executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst(); + // TODO -- re-enable once parallelization is fixed in BaseRecalibrator + //WalkerTestSpec specNT2 = new WalkerTestSpec( + // params.getCommandLine() + " -nt 2", + // Arrays.asList(params.md5)); + //executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst(); } @Test diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index e45cad971..91d982f20 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -136,6 +136,10 @@ public class BaseRecalibrator extends LocusWalker implements TreeRed */ public void initialize() { + // TODO -- remove me after the 2.1 release + if ( getToolkit().getArguments().numberOfThreads > 1 ) + throw new UserException("We have temporarily disabled the ability to run BaseRecalibrator multi-threaded for performance reasons. We hope to have this fixed for the next GATK release (2.2) and apologize for the inconvenience."); + // check for unsupported access if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals) throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument"); From c67d708c5173ea0f960e60f381b962b046faa12d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 20 Aug 2012 13:41:08 -0400 Subject: [PATCH 174/176] Bug fix in HaplotypeCaller for non-regular bases in the reference or reads. Those events don't get created any more. Bug fix for advanced GenotypeFullActiveRegion mode: custom variant annotations created by the HC don't make sense when in this mode so don't try to calculate them. --- .../haplotypecaller/GenotypingEngine.java | 47 ++++++++++---- .../haplotypecaller/HaplotypeCaller.java | 61 ++++++++++--------- .../LikelihoodCalculationEngine.java | 1 - .../genotyper/UnifiedArgumentCollection.java | 1 - .../broadinstitute/sting/utils/BaseUtils.java | 11 +++- 5 files changed, 76 insertions(+), 45 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 6afdc58ea..c56cf5bf2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -56,8 +56,12 @@ public class GenotypingEngine { // This function is the streamlined approach, currently not being used @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) - public List>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList haplotypes, final byte[] ref, final GenomeLoc refLoc, - final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser ) { + public List>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, + final ArrayList haplotypes, + final byte[] ref, + final GenomeLoc refLoc, + final GenomeLoc activeRegionWindow, + final GenomeLocParser genomeLocParser ) { // Prepare the list of haplotype indices to genotype final ArrayList allelesToGenotype = new ArrayList(); @@ -224,7 +228,6 @@ public class GenotypingEngine { } } - // Walk along each position in the key set and create each event to be outputted for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { @@ -533,24 +536,36 @@ public class GenotypingEngine { final int elementLength = ce.getLength(); switch( ce.getOperator() ) { case I: + { final ArrayList insertionAlleles = new ArrayList(); final int insertionStart = refLoc.getStart() + refPos - 1; - insertionAlleles.add( Allele.create(ref[refPos-1], true) ); + final byte refByte = ref[refPos-1]; + if( BaseUtils.isRegularBase(refByte) ) { + insertionAlleles.add( Allele.create(refByte, true) ); + } if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) { insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); } else { byte[] insertionBases = new byte[]{}; insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength )); - insertionAlleles.add( Allele.create(insertionBases, false) ); + if( BaseUtils.isAllRegularBases(insertionBases) ) { + insertionAlleles.add( Allele.create(insertionBases, false) ); + } + } + if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele + vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); } - vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); alignmentPos += elementLength; break; + } case S: + { alignmentPos += elementLength; break; + } case D: + { final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base final ArrayList deletionAlleles = new ArrayList(); final int deletionStart = refLoc.getStart() + refPos - 1; @@ -561,15 +576,20 @@ public class GenotypingEngine { // deletionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE ); // vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart, deletionAlleles).make()); //} else { + final byte refByte = ref[refPos-1]; + if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) { deletionAlleles.add( Allele.create(deletionBases, true) ); - deletionAlleles.add( Allele.create(ref[refPos-1], false) ); + deletionAlleles.add( Allele.create(refByte, false) ); vcs.put(deletionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make()); + } //} refPos += elementLength; break; + } case M: case EQ: case X: + { int numSinceMismatch = -1; int stopOfMismatch = -1; int startOfMismatch = -1; @@ -592,11 +612,13 @@ public class GenotypingEngine { if( numSinceMismatch > MNP_LOOK_AHEAD || (iii == elementLength - 1 && stopOfMismatch != -1) ) { final byte[] refBases = Arrays.copyOfRange( ref, refPosStartOfMismatch, refPosStartOfMismatch + (stopOfMismatch - startOfMismatch) + 1 ); final byte[] mismatchBases = Arrays.copyOfRange( alignment, startOfMismatch, stopOfMismatch + 1 ); - final ArrayList snpAlleles = new ArrayList(); - snpAlleles.add( Allele.create( refBases, true ) ); - snpAlleles.add( Allele.create( mismatchBases, false ) ); - final int snpStart = refLoc.getStart() + refPosStartOfMismatch; - vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make()); + if( BaseUtils.isAllRegularBases(refBases) && BaseUtils.isAllRegularBases(mismatchBases) ) { + final ArrayList snpAlleles = new ArrayList(); + snpAlleles.add( Allele.create( refBases, true ) ); + snpAlleles.add( Allele.create( mismatchBases, false ) ); + final int snpStart = refLoc.getStart() + refPosStartOfMismatch; + vcs.put(snpStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), snpStart, snpStart + (stopOfMismatch - startOfMismatch), snpAlleles).make()); + } numSinceMismatch = -1; stopOfMismatch = -1; startOfMismatch = -1; @@ -606,6 +628,7 @@ public class GenotypingEngine { alignmentPos++; } break; + } case N: case H: case P: diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index ec4fb3950..502711bb3 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -413,45 +413,48 @@ public class HaplotypeCaller extends ActiveRegionWalker implem for( final Pair>> callResult : ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES - ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser() ) + ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getExtendedLoc(), getToolkit().getGenomeLocParser() ) : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) { if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); } final Map>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult ); final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst()); - - // add some custom annotations to the calls final Map myAttributes = new LinkedHashMap(annotatedCall.getAttributes()); - // Calculate the number of variants on the haplotype - int maxNumVar = 0; - for( final Allele allele : callResult.getFirst().getAlleles() ) { - if( !allele.isReference() ) { - for( final Haplotype haplotype : callResult.getSecond().get(allele) ) { - final int numVar = haplotype.getEventMap().size(); - if( numVar > maxNumVar ) { maxNumVar = numVar; } + + if( !GENOTYPE_FULL_ACTIVE_REGION ) { + // add some custom annotations to the calls + + // Calculate the number of variants on the haplotype + int maxNumVar = 0; + for( final Allele allele : callResult.getFirst().getAlleles() ) { + if( !allele.isReference() ) { + for( final Haplotype haplotype : callResult.getSecond().get(allele) ) { + final int numVar = haplotype.getEventMap().size(); + if( numVar > maxNumVar ) { maxNumVar = numVar; } + } } } - } - // Calculate the event length - int maxLength = 0; - for ( final Allele a : annotatedCall.getAlternateAlleles() ) { - final int length = a.length() - annotatedCall.getReference().length(); - if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; } - } + // Calculate the event length + int maxLength = 0; + for ( final Allele a : annotatedCall.getAlternateAlleles() ) { + final int length = a.length() - annotatedCall.getReference().length(); + if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; } + } - myAttributes.put("NVH", maxNumVar); - myAttributes.put("NumHapEval", bestHaplotypes.size()); - myAttributes.put("NumHapAssembly", haplotypes.size()); - myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size()); - myAttributes.put("EVENTLENGTH", maxLength); - myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") ); - myAttributes.put("extType", annotatedCall.getType().toString() ); + myAttributes.put("NVH", maxNumVar); + myAttributes.put("NumHapEval", bestHaplotypes.size()); + myAttributes.put("NumHapAssembly", haplotypes.size()); + myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size()); + myAttributes.put("EVENTLENGTH", maxLength); + myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") ); + myAttributes.put("extType", annotatedCall.getType().toString() ); - //if( likelihoodCalculationEngine.haplotypeScore != null ) { - // myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore)); - //} - if( annotatedCall.hasAttribute("QD") ) { - myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) ); + //if( likelihoodCalculationEngine.haplotypeScore != null ) { + // myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore)); + //} + if( annotatedCall.hasAttribute("QD") ) { + myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) ); + } } vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index fabf5633f..b5ce4b4bc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -179,7 +179,6 @@ public class LikelihoodCalculationEngine { final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample); for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) { // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2) - // log10(10^(a*x1) + 10^(b*x2)) ??? // First term is approximated by Jacobian log with table lookup. haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e755a1e36..f11f1e599 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -181,7 +181,6 @@ public class UnifiedArgumentCollection { Generalized ploidy argument (debug only): When building site error models, ignore lane information and build only sample-level error model */ - @Argument(fullName = "ignoreLaneInfo", shortName = "ignoreLane", doc = "Ignore lane when building error model, error model is then per-site", required = false) public boolean IGNORE_LANE_INFO = false; diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 13571df78..2d7f51c3f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -227,14 +227,21 @@ public class BaseUtils { } @Deprecated - static public boolean isRegularBase(char base) { + static public boolean isRegularBase( final char base ) { return simpleBaseToBaseIndex(base) != -1; } - static public boolean isRegularBase(byte base) { + static public boolean isRegularBase( final byte base ) { return simpleBaseToBaseIndex(base) != -1; } + static public boolean isAllRegularBases( final byte[] bases ) { + for( final byte base : bases) { + if( !isRegularBase(base) ) { return false; } + } + return true; + } + static public boolean isNBase(byte base) { return base == 'N' || base == 'n'; } From 4450d66c647f949da89e9590d2c1e336e82da742 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 20 Aug 2012 15:10:24 -0400 Subject: [PATCH 175/176] Fixing the docs for DP and AD --- .../gatk/walkers/annotator/DepthOfCoverage.java | 17 ++++------------- .../annotator/DepthPerAlleleBySample.java | 12 ++++++------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 28ca77f18..39b5e84dc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -22,19 +22,10 @@ import java.util.Map; /** * Total (unfiltered) depth over all samples. * - * This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample - * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal - * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. - * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the - * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the - * power I have to determine the genotype of the sample at this site, while the AD tells me how many times - * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering - * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like - * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would - * normally be excluded from the statistical calculations going into GQ and QUAL. - * - * Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with - * -dcov D is N * D + * While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's + * internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth + * over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for + * N samples with -dcov D is N * D */ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index a9edab752..5d83ddd51 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -24,10 +24,10 @@ import java.util.List; /** * The depth of coverage of each VCF allele in this sample. * - * This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample - * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal - * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. - * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the + * The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this + * sample at this site. While the sample-level (FORMAT) DP field describes the total depth of reads that passed the + * Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of + * REF and ALT fields) is the unfiltered count of all reads that carried with them the * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the * power I have to determine the genotype of the sample at this site, while the AD tells me how many times * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering @@ -35,10 +35,10 @@ import java.util.List; * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that - * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and + * are unambiguously informative about the alternate allele). Because of this fact and * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what - * determine the genotype calls (see below). + * determine the genotype calls. */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { From 464d49509a7f3acba38b89b64b7fed8e04972365 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 20 Aug 2012 15:28:39 -0400 Subject: [PATCH 176/176] Pulling out common caller arguments into its own StandardCallerArgumentCollection base class so that every caller isn't exposed to the unused arguments from every other caller. --- .../haplotypecaller/HaplotypeCaller.java | 5 +- .../StandardCallerArgumentCollection.java | 62 +++++++++++++++++++ .../genotyper/UnifiedArgumentCollection.java | 58 ++++------------- 3 files changed, 78 insertions(+), 47 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 502711bb3..4f434bba6 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.commandline.*; @@ -189,7 +190,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem protected String[] annotationClassesToUse = { "Standard" }; @ArgumentCollection - private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); + private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection(); // the calculation arguments private UnifiedGenotyperEngine UG_engine = null; @@ -240,7 +241,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); samplesList.addAll( samples ); // initialize the UnifiedGenotyper Engine which is used to call into the exact model - UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument + final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java new file mode 100644 index 000000000..f30fc0316 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -0,0 +1,62 @@ +package org.broadinstitute.sting.gatk.arguments; + +import org.broadinstitute.sting.commandline.Advanced; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin + * Date: 8/20/12 + * A collection of arguments that are common to the various callers. + * This is pulled out so that every caller isn't exposed to the arguments from every other caller. + */ + +public class StandardCallerArgumentCollection { + /** + * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are: + * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2 + */ + @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) + public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY; + + @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false) + public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; + + @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false) + public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; + + /** + * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with + * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this + * is the default). + */ + @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false) + public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0; + + /** + * This argument allows you to emit low quality calls as filtered records. + */ + @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false) + public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0; + + /** + * When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding + */ + @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false) + public RodBinding alleles; + + /** + * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), + * then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it + * scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend + * that you not play around with this parameter. + */ + @Advanced + @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) + public int MAX_ALTERNATE_ALLELES = 3; +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index f11f1e599..30c0f3e18 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -26,11 +26,12 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -public class UnifiedArgumentCollection { +public class UnifiedArgumentCollection extends StandardCallerArgumentCollection { @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false) public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; @@ -42,13 +43,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false) protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; - /** - * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are: - * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2 - */ - @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) - public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY; - /** * The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily * distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it @@ -57,26 +51,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false) public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE; - @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false) - public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; - - @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false) - public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; - - /** - * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with - * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this - * is the default). - */ - @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be called", required = false) - public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0; - - /** - * This argument allows you to emit low quality calls as filtered records. - */ - @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)", required = false) - public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0; - /** * Note that calculating the SLOD increases the runtime by an appreciable amount. */ @@ -90,12 +64,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "annotateNDA", shortName = "nda", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", required = false) public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false; - /** - * When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding - */ - @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false) - public RodBinding alleles; - /** * The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base * is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value. @@ -107,16 +75,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false) public Double MAX_DELETION_FRACTION = 0.05; - /** - * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), - * then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it - * scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend - * that you not play around with this parameter. - */ - @Advanced - @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) - public int MAX_ALTERNATE_ALLELES = 3; - @Hidden @Argument(fullName = "cap_max_alternate_alleles_for_indels", shortName = "capMaxAltAllelesForIndels", doc = "Cap the maximum number of alternate alleles to genotype for indel calls at 2; overrides the --max_alternate_alleles argument; GSA production use only", required = false) public boolean CAP_MAX_ALTERNATE_ALLELES_FOR_INDELS = false; @@ -139,7 +97,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "min_indel_fraction_per_sample", shortName = "minIndelFrac", doc = "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles", required = false) public double MIN_INDEL_FRACTION_PER_SAMPLE = 0.25; - /** * This argument informs the prior probability of having an indel at a site. */ @@ -274,5 +231,16 @@ public class UnifiedArgumentCollection { return uac; } + public UnifiedArgumentCollection() { } + public UnifiedArgumentCollection( final StandardCallerArgumentCollection SCAC ) { + super(); + this.alleles = SCAC.alleles; + this.GenotypingMode = SCAC.GenotypingMode; + this.heterozygosity = SCAC.heterozygosity; + this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES; + this.OutputMode = SCAC.OutputMode; + this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING; + this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING; + } }