diff --git a/build.xml b/build.xml index e92e41c10..1e88bb400 100644 --- a/build.xml +++ b/build.xml @@ -708,6 +708,9 @@ + + + diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 2b3290595..93bdf8c9d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -54,7 +54,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 3acba48ae..13969eb54 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -55,7 +55,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.variant.vcf.VCFHeaderLineType; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 2df5fefa8..354e508c2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -60,7 +60,7 @@ import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 8c8de2bad..2baa89999 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -47,7 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java index aa117eb3b..14bffbc34 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java @@ -49,9 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java index 9aa8c13ec..9ea027698 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java @@ -77,7 +77,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index b3740bbb7..f48ae81cf 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 84c109c9d..5a1bdf9e5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Haplotype; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 7dc3e8ee3..0652cc236 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 8f6097661..19d218023 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -62,7 +62,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 9aeffe966..8b789791d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -58,7 +58,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.variantcontext.*; import java.io.PrintStream; @@ -84,7 +84,7 @@ public class GenotypingEngine { final List haplotypes, final List samples, final Map haplotypeReadMap, - final Map> perSampleFilteredReadList, + final Map> perSampleFilteredReadList, final byte[] ref, final GenomeLoc refLoc, final GenomeLoc activeRegionWindow, @@ -124,12 +124,12 @@ public class GenotypingEngine { // Walk along each position in the key set and create each event to be outputted for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region - final ArrayList eventsAtThisLoc = new ArrayList(); // the overlapping events to merge into a common reference view - final ArrayList priorityList = new ArrayList(); // used to merge overlapping events into common reference view + final List eventsAtThisLoc = new ArrayList(); // the overlapping events to merge into a common reference view + final List priorityList = new ArrayList(); // used to merge overlapping events into common reference view if( !in_GGA_mode ) { for( final Haplotype h : haplotypes ) { - final HashMap eventMap = h.getEventMap(); + final Map eventMap = h.getEventMap(); final VariantContext vc = eventMap.get(loc); if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) { eventsAtThisLoc.add(vc); @@ -142,7 +142,7 @@ public class GenotypingEngine { if( compVC.getStart() == loc ) { int alleleCount = 0; for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - ArrayList alleleSet = new ArrayList(2); + List alleleSet = new ArrayList(2); alleleSet.add(compVC.getReference()); alleleSet.add(compAltAllele); final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount; @@ -180,7 +180,7 @@ public class GenotypingEngine { if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles."); } - final HashMap mergeMap = new HashMap(); + final Map mergeMap = new HashMap(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) { mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function @@ -232,7 +232,7 @@ public class GenotypingEngine { return genotypes; } - private void validatePriorityList( final ArrayList priorityList, final ArrayList eventsAtThisLoc ) { + private void validatePriorityList( final List priorityList, final List eventsAtThisLoc ) { for( final VariantContext vc : eventsAtThisLoc ) { if( !priorityList.contains(vc.getSource()) ) { throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles."); @@ -251,7 +251,7 @@ public class GenotypingEngine { private static Map filterToOnlyOverlappingReads( final GenomeLocParser parser, final Map perSampleReadMap, - final Map> perSampleFilteredReadList, + final Map> perSampleFilteredReadList, final VariantContext call ) { final Map returnMap = new HashMap(); @@ -284,7 +284,7 @@ public class GenotypingEngine { } protected static void cleanUpSymbolicUnassembledEvents( final List haplotypes ) { - final ArrayList haplotypesToRemove = new ArrayList(); + final List haplotypesToRemove = new ArrayList(); for( final Haplotype h : haplotypes ) { for( final VariantContext vc : h.getEventMap().values() ) { if( vc.isSymbolic() ) { @@ -407,7 +407,7 @@ public class GenotypingEngine { // remove the old event from the eventMap on every haplotype and the start pos key set, replace with merged event for( final Haplotype h : haplotypes ) { - final HashMap eventMap = h.getEventMap(); + final Map eventMap = h.getEventMap(); if( eventMap.containsKey(thisStart) && eventMap.containsKey(nextStart) ) { eventMap.remove(thisStart); eventMap.remove(nextStart); @@ -418,7 +418,7 @@ public class GenotypingEngine { boolean containsStart = false; boolean containsNext = false; for( final Haplotype h : haplotypes ) { - final HashMap eventMap = h.getEventMap(); + final Map eventMap = h.getEventMap(); if( eventMap.containsKey(thisStart) ) { containsStart = true; } if( eventMap.containsKey(nextStart) ) { containsNext = true; } } @@ -457,7 +457,7 @@ public class GenotypingEngine { if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } } - final ArrayList mergedAlleles = new ArrayList(); + final List mergedAlleles = new ArrayList(); mergedAlleles.add( Allele.create( ArrayUtils.subarray(refBases, iii, refBases.length), true ) ); mergedAlleles.add( Allele.create( ArrayUtils.subarray(altBases, iii, altBases.length), false ) ); return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make(); @@ -492,10 +492,10 @@ public class GenotypingEngine { eventMapper.put(new Event(vc), new ArrayList()); } - final ArrayList undeterminedHaplotypes = new ArrayList(haplotypes.size()); + final List undeterminedHaplotypes = new ArrayList(haplotypes.size()); for( final Haplotype h : haplotypes ) { if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) { - final ArrayList alleles = new ArrayList(2); + final List alleles = new ArrayList(2); alleles.add(h.getArtificialRefAllele()); alleles.add(h.getArtificialAltAllele()); final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype") @@ -572,13 +572,13 @@ public class GenotypingEngine { } @Ensures({"result.size() == haplotypeAllelesForSample.size()"}) - protected static List findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final ArrayList> alleleMapper, final ArrayList haplotypes ) { + protected static List findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final List> alleleMapper, final List haplotypes ) { if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; } - final ArrayList eventAllelesForSample = new ArrayList(); + final List eventAllelesForSample = new ArrayList(); for( final Allele a : haplotypeAllelesForSample ) { final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a)); for( int iii = 0; iii < alleleMapper.size(); iii++ ) { - final ArrayList mappedHaplotypes = alleleMapper.get(iii); + final List mappedHaplotypes = alleleMapper.get(iii); if( mappedHaplotypes.contains(haplotype) ) { eventAllelesForSample.add(eventAlleles.get(iii)); break; @@ -597,8 +597,8 @@ public class GenotypingEngine { return false; } - protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) { - final HashMap vcs = new HashMap(); + protected static Map generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) { + final Map vcs = new HashMap(); int refPos = alignmentStartHapwrtRef; if( refPos < 0 ) { return null; } // Protection against SW failures @@ -609,7 +609,7 @@ public class GenotypingEngine { switch( ce.getOperator() ) { case I: { - final ArrayList insertionAlleles = new ArrayList(); + final List insertionAlleles = new ArrayList(); final int insertionStart = refLoc.getStart() + refPos - 1; final byte refByte = ref[refPos-1]; if( BaseUtils.isRegularBase(refByte) ) { @@ -639,7 +639,7 @@ public class GenotypingEngine { case D: { final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base - final ArrayList deletionAlleles = new ArrayList(); + final List deletionAlleles = new ArrayList(); final int deletionStart = refLoc.getStart() + refPos - 1; // BUGBUG: how often does this symbolic deletion allele case happen? //if( haplotype != null && ( (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength) @@ -667,7 +667,7 @@ public class GenotypingEngine { final byte altByte = alignment[alignmentPos]; if( refByte != altByte ) { // SNP! if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) { - final ArrayList snpAlleles = new ArrayList(); + final List snpAlleles = new ArrayList(); snpAlleles.add( Allele.create( refByte, true ) ); snpAlleles.add( Allele.create( altByte, false ) ); vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make()); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index a27eaac8e..027c62e68 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -56,6 +56,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.BadMateFilter; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; @@ -132,7 +133,7 @@ import java.util.*; @PartitionBy(PartitionType.LOCUS) @BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) @ActiveRegionTraversalParameters(extension=65, maxRegion=300) -//@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=5) +@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=20) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { /** @@ -180,9 +181,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false) protected int minKmer = 11; - @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) - protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000; - /** * If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling * when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the @@ -276,10 +274,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // bases with quality less than or equal to this value are trimmed off the tails of the reads private static final byte MIN_TAIL_QUALITY = 20; - private ArrayList samplesList = new ArrayList(); + private List samplesList = new ArrayList(); private final static double LOG_ONE_HALF = -Math.log10(2.0); private final static double LOG_ONE_THIRD = -Math.log10(3.0); - private final ArrayList allelesToGenotype = new ArrayList(); + private final List allelesToGenotype = new ArrayList(); private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file private final static Allele FAKE_ALT_ALLELE = Allele.create("", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file @@ -431,7 +429,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() ); } - final ArrayList alleles = new ArrayList(); + final List alleles = new ArrayList(); alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_ALT_ALLELE ); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); @@ -452,7 +450,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work return 1; - final ArrayList activeAllelesToGenotype = new ArrayList(); + final List activeAllelesToGenotype = new ArrayList(); if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { for( final VariantContext vc : allelesToGenotype ) { @@ -463,7 +461,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem allelesToGenotype.removeAll( activeAllelesToGenotype ); } - if( !activeRegion.isActive()) { return 0; } // Not active so nothing to do! + if( !activeRegion.isActive() ) { return 0; } // Not active so nothing to do! if( activeRegion.size() == 0 && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { return 0; } // No reads here so nothing to do! if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && activeAllelesToGenotype.isEmpty() ) { return 0; } // No alleles found in this region so nothing to do! @@ -474,8 +472,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); // Create the reference haplotype which is the bases from the reference that make up the active region final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING); - //int PRUNE_FACTOR = Math.max(MIN_PRUNE_FACTOR, determinePruneFactorFromCoverage( activeRegion )); - final ArrayList haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype ); + final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype ); if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do! activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region @@ -487,10 +484,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // evaluate each sample's reads against all haplotypes final Map stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, splitReadsBySample( activeRegion.getReads() ) ); - final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); + final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? + final List bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes ); for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoods( UG_engine, @@ -561,7 +558,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } - final ArrayList finalizedReadList = new ArrayList(); + final List finalizedReadList = new ArrayList(); final FragmentCollection fragmentCollection = FragmentUtils.create( activeRegion.getReads() ); activeRegion.clearReads(); @@ -571,18 +568,13 @@ public class HaplotypeCaller extends ActiveRegionWalker implem finalizedReadList.addAll( FragmentUtils.mergeOverlappingPairedFragments(overlappingPair) ); } - Collections.shuffle(finalizedReadList, GenomeAnalysisEngine.getRandomGenerator()); - // Loop through the reads hard clipping the adaptor and low quality tails - final ArrayList readsToUse = new ArrayList(finalizedReadList.size()); + final List readsToUse = new ArrayList(finalizedReadList.size()); for( final GATKSAMRecord myRead : finalizedReadList ) { final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) ); if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY ); - // protect against INTERVALS with abnormally high coverage - // TODO BUGBUG: remove when positional downsampler is hooked up to ART/HC - if( activeRegion.readOverlapsRegion(clippedRead) && - clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) { + if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) { readsToUse.add(clippedRead); } } @@ -591,7 +583,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { - final ArrayList readsToRemove = new ArrayList(); + final List readsToRemove = new ArrayList(); for( final GATKSAMRecord rec : activeRegion.getReads() ) { if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); @@ -607,10 +599,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReadSpanLoc().getContig(), padLeft, padRight); } - private HashMap> splitReadsBySample( final List reads ) { - final HashMap> returnMap = new HashMap>(); + private Map> splitReadsBySample( final List reads ) { + final Map> returnMap = new HashMap>(); for( final String sample : samplesList) { - ArrayList readList = returnMap.get( sample ); + List readList = returnMap.get( sample ); if( readList == null ) { readList = new ArrayList(); returnMap.put(sample, readList); @@ -711,24 +703,4 @@ public class HaplotypeCaller extends ActiveRegionWalker implem return new Cigar(readCigarElements); } - - /* - private int determinePruneFactorFromCoverage( final ActiveRegion activeRegion ) { - final ArrayList readLengthDistribution = new ArrayList(); - for( final GATKSAMRecord read : activeRegion.getReads() ) { - readLengthDistribution.add(read.getReadLength()); - } - final double meanReadLength = MathUtils.average(readLengthDistribution); - final double meanCoveragePerSample = (double) activeRegion.getReads().size() / ((double) activeRegion.getExtendedLoc().size() / meanReadLength) / (double) samplesList.size(); - int PRUNE_FACTOR = 0; - if( meanCoveragePerSample > 8.5 ) { - PRUNE_FACTOR = (int) Math.floor( Math.sqrt( meanCoveragePerSample - 5.0 ) ); - } else if( meanCoveragePerSample > 3.0 ) { - PRUNE_FACTOR = 1; - } - - if( DEBUG ) { System.out.println(String.format("Mean coverage per sample = %.1f --> prune factor = %d", meanCoveragePerSample, PRUNE_FACTOR)); } - return PRUNE_FACTOR; - } - */ } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 57e071189..655b3e529 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -91,11 +91,11 @@ public class LikelihoodCalculationEngine { DEBUG = debug; } - public Map computeReadLikelihoods( final ArrayList haplotypes, final HashMap> perSampleReadList ) { + public Map computeReadLikelihoods( final List haplotypes, final Map> perSampleReadList ) { final Map stratifiedReadMap = new HashMap(); int X_METRIC_LENGTH = 0; - for( final Map.Entry> sample : perSampleReadList.entrySet() ) { + for( final Map.Entry> sample : perSampleReadList.entrySet() ) { for( final GATKSAMRecord read : sample.getValue() ) { final int readLength = read.getReadLength(); if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; } @@ -115,7 +115,7 @@ public class LikelihoodCalculationEngine { pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH); // for each sample's reads - for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { + for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); } // evaluate the likelihood of the reads given those haplotypes stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue())); @@ -123,7 +123,7 @@ public class LikelihoodCalculationEngine { return stratifiedReadMap; } - private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList haplotypes, final ArrayList reads) { + private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List haplotypes, final List reads) { // first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time) final int numHaplotypes = haplotypes.size(); final Map alleleVersions = new HashMap(numHaplotypes); @@ -235,72 +235,13 @@ public class LikelihoodCalculationEngine { return likelihoodMatrix; } - /* @Requires({"haplotypes.size() > 0"}) @Ensures({"result.size() <= haplotypes.size()"}) - public ArrayList selectBestHaplotypes( final ArrayList haplotypes ) { - - // BUGBUG: This function needs a lot of work. Need to use 4-gamete test or Tajima's D to decide to break up events into separate pieces for genotyping - - final int numHaplotypes = haplotypes.size(); - final Set sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples - final ArrayList bestHaplotypesIndexList = new ArrayList(); - bestHaplotypesIndexList.add(0); // always start with the reference haplotype - final double[][][] haplotypeLikelihoodMatrix = new double[sampleKeySet.size()][numHaplotypes][numHaplotypes]; - - int sampleCount = 0; - for( final String sample : sampleKeySet ) { - haplotypeLikelihoodMatrix[sampleCount++] = computeDiploidHaplotypeLikelihoods( haplotypes, sample ); - } - - int hap1 = 0; - int hap2 = 0; - int chosenSample = 0; - //double bestElement = Double.NEGATIVE_INFINITY; - final int maxChosenHaplotypes = Math.min( 15, sampleKeySet.size() * 2 + 1 ); - while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { - double maxElement = Double.NEGATIVE_INFINITY; - for( int kkk = 0; kkk < sampleCount; kkk++ ) { - for( int iii = 0; iii < numHaplotypes; iii++ ) { - for( int jjj = 0; jjj <= iii; jjj++ ) { - if( haplotypeLikelihoodMatrix[kkk][iii][jjj] > maxElement ) { - maxElement = haplotypeLikelihoodMatrix[kkk][iii][jjj]; - hap1 = iii; - hap2 = jjj; - chosenSample = kkk; - } - } - } - } - if( maxElement == Double.NEGATIVE_INFINITY ) { break; } - - if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); } - if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); } - - for( int iii = 0; iii < numHaplotypes; iii++ ) { - for( int jjj = 0; jjj <= iii; jjj++ ) { - haplotypeLikelihoodMatrix[chosenSample][iii][jjj] = Double.NEGATIVE_INFINITY; - } - } - } - - if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); } - - final ArrayList bestHaplotypes = new ArrayList(); - for( final int hIndex : bestHaplotypesIndexList ) { - bestHaplotypes.add( haplotypes.get(hIndex) ); - } - return bestHaplotypes; - } - */ - - @Requires({"haplotypes.size() > 0"}) - @Ensures({"result.size() <= haplotypes.size()"}) - public ArrayList selectBestHaplotypes( final ArrayList haplotypes, final Map stratifiedReadMap, final int maxNumHaplotypesInPopulation ) { + public List selectBestHaplotypes( final List haplotypes, final Map stratifiedReadMap, final int maxNumHaplotypesInPopulation ) { final int numHaplotypes = haplotypes.size(); final Set sampleKeySet = stratifiedReadMap.keySet(); - final ArrayList bestHaplotypesIndexList = new ArrayList(); + final List bestHaplotypesIndexList = new ArrayList(); bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype final List haplotypesAsAlleles = new ArrayList(); for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h.getBases())); } @@ -332,7 +273,7 @@ public class LikelihoodCalculationEngine { if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); } - final ArrayList bestHaplotypes = new ArrayList(); + final List bestHaplotypes = new ArrayList(); for( final int hIndex : bestHaplotypesIndexList ) { bestHaplotypes.add( haplotypes.get(hIndex) ); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index b0e340dc2..3efa342b1 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -67,5 +67,5 @@ public abstract class LocalAssemblyEngine { protected LocalAssemblyEngine() { } - public abstract ArrayList runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, ArrayList activeAllelesToGenotype); + public abstract List runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, List activeAllelesToGenotype); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index e16994fa4..a9768557d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -84,7 +84,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { private final boolean DEBUG; private final PrintStream GRAPH_WRITER; - private final ArrayList> graphs = new ArrayList>(); + private final List> graphs = new ArrayList>(); private final int MIN_KMER; private int PRUNE_FACTOR = 2; @@ -96,7 +96,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { MIN_KMER = minKmer; } - public ArrayList runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList activeAllelesToGenotype ) { + public List runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final List activeAllelesToGenotype ) { this.PRUNE_FACTOR = PRUNE_FACTOR; // create the graphs @@ -168,7 +168,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } protected static void pruneGraph( final DefaultDirectedGraph graph, final int pruneFactor ) { - final ArrayList edgesToRemove = new ArrayList(); + final List edgesToRemove = new ArrayList(); for( final DeBruijnEdge e : graph.edgeSet() ) { if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor edgesToRemove.add(e); @@ -177,7 +177,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { graph.removeAllEdges(edgesToRemove); // Run through the graph and clean up singular orphaned nodes - final ArrayList verticesToRemove = new ArrayList(); + final List verticesToRemove = new ArrayList(); for( final DeBruijnVertex v : graph.vertexSet() ) { if( graph.inDegreeOf(v) == 0 && graph.outDegreeOf(v) == 0 ) { verticesToRemove.add(v); @@ -187,7 +187,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } protected static void eliminateNonRefPaths( final DefaultDirectedGraph graph ) { - final ArrayList verticesToRemove = new ArrayList(); + final List verticesToRemove = new ArrayList(); boolean done = false; while( !done ) { done = true; @@ -313,8 +313,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } @Ensures({"result.contains(refHaplotype)"}) - private ArrayList findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final ArrayList activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) { - final ArrayList returnHaplotypes = new ArrayList(); + private List findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final List activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) { + final List returnHaplotypes = new ArrayList(); // add the reference haplotype separately from all the others final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( fullReferenceWithPadding, refHaplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); @@ -343,7 +343,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { // for GGA mode, add the desired allele into the haplotype if it isn't already present if( !activeAllelesToGenotype.isEmpty() ) { - final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place + final Map eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); @@ -378,7 +378,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { return returnHaplotypes; } - private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) { + private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final List haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) { if( haplotype == null ) { return false; } final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 851703648..ad554a130 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -75,7 +75,7 @@ import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java index de91765b7..cbc6a1f94 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java @@ -46,7 +46,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import java.util.Arrays; import java.util.LinkedList; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java index f2ba027f8..a04789f61 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java @@ -46,7 +46,7 @@ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index 56fc59c7f..fe38461c5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -59,7 +59,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.SampleUtils; @@ -320,7 +320,7 @@ public class ReadBackedPhasing extends RodWalker that maps from rgKey x errorModel -> double, - // where the double is the result of this calculation. The entire calculation can - // be done upfront, on initialization of this BaseRecalibration structure - // - final NestedIntegerArray byReadGroupTable = recalibrationTables.getReadGroupTable(); - globalDeltaQs = new NestedIntegerArray( byReadGroupTable.getDimensions() ); - logger.info("Calculating global delta Q table..."); - for ( NestedIntegerArray.Leaf leaf : byReadGroupTable.getAllLeaves() ) { - final int rgKey = leaf.keys[0]; - final int eventIndex = leaf.keys[1]; - final double globalDeltaQ = calculateGlobalDeltaQ(rgKey, EventType.eventFrom(eventIndex)); - globalDeltaQs.put(globalDeltaQ, rgKey, eventIndex); - } - - - // The calculation of the deltaQ report is constant. key[0] and key[1] are the read group and qual, respectively - // and globalDeltaQ is a constant for the read group. So technically the delta Q reported is simply a lookup - // into a matrix indexed by rgGroup, qual, and event type. - // the code below actually creates this cache with a NestedIntegerArray calling into the actual - // calculateDeltaQReported code. - final NestedIntegerArray byQualTable = recalibrationTables.getQualityScoreTable(); - deltaQReporteds = new NestedIntegerArray( byQualTable.getDimensions() ); - logger.info("Calculating delta Q reported table..."); - for ( NestedIntegerArray.Leaf leaf : byQualTable.getAllLeaves() ) { - final int rgKey = leaf.keys[0]; - final int qual = leaf.keys[1]; - final int eventIndex = leaf.keys[2]; - final EventType event = EventType.eventFrom(eventIndex); - final double globalDeltaQ = getGlobalDeltaQ(rgKey, event); - final double deltaQReported = calculateDeltaQReported(rgKey, qual, event, globalDeltaQ, (byte)qual); - deltaQReporteds.put(deltaQReported, rgKey, qual, eventIndex); - } - - logger.info("done calculating cache"); } /** @@ -189,29 +150,37 @@ public class BaseRecalibration { // the rg key is constant over the whole read, the global deltaQ is too final int rgKey = fullReadKeySet[0][0]; + final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal()); - final double globalDeltaQ = getGlobalDeltaQ(rgKey, errorModel); + if( empiricalQualRG != null ) { + final double epsilon = ( globalQScorePrior > 0.0 && errorModel.equals(EventType.BASE_SUBSTITUTION) ? globalQScorePrior : empiricalQualRG.getEstimatedQReported() ); - for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read - final byte origQual = quals[offset]; + for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read + final byte origQual = quals[offset]; - // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) - if ( origQual >= preserveQLessThan ) { - // get the keyset for this base using the error model - final int[] keySet = fullReadKeySet[offset]; - final double deltaQReported = getDeltaQReported(keySet[0], keySet[1], errorModel, globalDeltaQ); - final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, keySet, errorModel, globalDeltaQ, deltaQReported, origQual); + // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) + if ( origQual >= preserveQLessThan ) { + // get the keyset for this base using the error model + final int[] keySet = fullReadKeySet[offset]; + final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(keySet[0], keySet[1], errorModel.ordinal()); + final List empiricalQualCovs = new ArrayList(); + for (int i = 2; i < requestedCovariates.length; i++) { + if (keySet[i] < 0) { + continue; + } + empiricalQualCovs.add(recalibrationTables.getTable(i).get(keySet[0], keySet[1], keySet[i], errorModel.ordinal())); + } - // calculate the recalibrated qual using the BQSR formula - double recalibratedQualDouble = origQual + globalDeltaQ + deltaQReported + deltaQCovariates; + double recalibratedQualDouble = hierarchicalBayesianQualityEstimate( epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovs ); - // recalibrated quality is bound between 1 and MAX_QUAL - final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE); + // recalibrated quality is bound between 1 and MAX_QUAL + final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE); - // return the quantized version of the recalibrated quality - final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual); + // return the quantized version of the recalibrated quality + final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual); - quals[offset] = recalibratedQualityScore; + quals[offset] = recalibratedQualityScore; + } } } @@ -220,102 +189,15 @@ public class BaseRecalibration { } } - private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) { - final Double cached = globalDeltaQs.get(rgKey, errorModel.ordinal()); - - if ( TEST_CACHING ) { - final double calcd = calculateGlobalDeltaQ(rgKey, errorModel); - if ( calcd != cached ) - throw new IllegalStateException("calculated " + calcd + " and cached " + cached + " global delta q not equal at " + rgKey + " / " + errorModel); + @Ensures("result > 0.0") + protected static double hierarchicalBayesianQualityEstimate( final double epsilon, final RecalDatum empiricalQualRG, final RecalDatum empiricalQualQS, final List empiricalQualCovs ) { + final double globalDeltaQ = ( empiricalQualRG == null ? 0.0 : empiricalQualRG.getEmpiricalQuality(epsilon) - epsilon ); + final double deltaQReported = ( empiricalQualQS == null ? 0.0 : empiricalQualQS.getEmpiricalQuality(globalDeltaQ + epsilon) - (globalDeltaQ + epsilon) ); + double deltaQCovariates = 0.0; + for( final RecalDatum empiricalQualCov : empiricalQualCovs ) { + deltaQCovariates += ( empiricalQualCov == null ? 0.0 : empiricalQualCov.getEmpiricalQuality(deltaQReported + globalDeltaQ + epsilon) - (deltaQReported + globalDeltaQ + epsilon) ); } - return cachedWithDefault(cached); - } - - private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) { - final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.ordinal()); - - if ( TEST_CACHING ) { - final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey); - if ( calcd != cached ) - throw new IllegalStateException("calculated " + calcd + " and cached " + cached + " global delta q not equal at " + rgKey + " / " + qualKey + " / " + errorModel); - } - - return cachedWithDefault(cached); - } - - /** - * @param d a Double (that may be null) that is the result of a delta Q calculation - * @return a double == d if d != null, or 0.0 if it is - */ - private double cachedWithDefault(final Double d) { - return d == null ? 0.0 : d; - } - - /** - * Note that this calculation is a constant for each rgKey and errorModel. We need only - * compute this value once for all data. - * - * @param rgKey read group key - * @param errorModel the event type - * @return global delta Q - */ - private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) { - double result = 0.0; - - final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal()); - - if (empiricalQualRG != null) { - final double aggregrateQReported = ( globalQScorePrior > 0.0 && errorModel.equals(EventType.BASE_SUBSTITUTION) ? globalQScorePrior : empiricalQualRG.getEstimatedQReported() ); - final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality(aggregrateQReported); - result = globalDeltaQEmpirical - aggregrateQReported; - } - - return result; - } - - private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) { - double result = 0.0; - - final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.ordinal()); - if (empiricalQualQS != null) { - final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality(globalDeltaQ + qualFromRead); - result = deltaQReportedEmpirical - (globalDeltaQ + qualFromRead); - } - - return result; - } - - private double calculateDeltaQCovariates(final RecalibrationTables recalibrationTables, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) { - double result = 0.0; - - // for all optional covariates - for (int i = 2; i < requestedCovariates.length; i++) { - if (key[i] < 0) - continue; - - result += calculateDeltaQCovariate(recalibrationTables.getTable(i), - key[0], key[1], key[i], errorModel, - globalDeltaQ, deltaQReported, qualFromRead); - } - - return result; - } - - private double calculateDeltaQCovariate(final NestedIntegerArray table, - final int rgKey, - final int qualKey, - final int tableKey, - final EventType errorModel, - final double globalDeltaQ, - final double deltaQReported, - final byte qualFromRead) { - final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.ordinal()); - if (empiricalQualCO != null) { - final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(deltaQReported + globalDeltaQ + qualFromRead); - return deltaQCovariateEmpirical - (deltaQReported + globalDeltaQ + qualFromRead); - } else { - return 0.0; - } + return epsilon + globalDeltaQ + deltaQReported + deltaQCovariates; } } diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index 0b4441439..bd0063a7d 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -181,6 +181,7 @@ public class RecalDatum { if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN"); this.estimatedQReported = estimatedQReported; + empiricalQuality = UNINITIALIZED; } public final double getEstimatedQReported() { diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index 699f26c5e..f7c3440e4 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.recalibration.covariates.*; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index 2f9a38972..f10c26ddc 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -131,7 +131,7 @@ public class RecalibrationReport { * Combines two recalibration reports by adding all observations and errors * * Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate - * them after combining. The reason for not calculating it is because this function is inteded for combining a + * them after combining. The reason for not calculating it is because this function is intended for combining a * series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized * qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate, * makes this method faster diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 66c74d667..4fc9470f4 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -48,7 +48,7 @@ package org.broadinstitute.sting.utils.recalibration.covariates; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java index 0578ac60c..bccaea827 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java @@ -48,7 +48,7 @@ package org.broadinstitute.sting.utils.recalibration.covariates; import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatCovariate.java index b6b954c4a..0eeb18251 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatCovariate.java @@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollectio import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.collections.Pair; import java.util.Arrays; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index 2815599d9..f82f24439 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -125,19 +125,19 @@ public class BQSRGathererUnitTest extends BaseTest { testTablesWithColumns(originalTable, calculatedTable, columnsToTest); // test the RecalTable0 table - columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); originalTable = originalReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE); calculatedTable = calculatedReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE); testTablesWithColumns(originalTable, calculatedTable, columnsToTest); // test the RecalTable1 table - columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); originalTable = originalReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE); calculatedTable = calculatedReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE); testTablesWithColumns(originalTable, calculatedTable, columnsToTest); // test the RecalTable2 table - columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.COVARIATE_VALUE_COLUMN_NAME, RecalUtils.COVARIATE_NAME_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.COVARIATE_VALUE_COLUMN_NAME, RecalUtils.COVARIATE_NAME_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); originalTable = originalReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE); calculatedTable = calculatedReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE); testTablesWithColumns(originalTable, calculatedTable, columnsToTest); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 254bfd92c..1f4875298 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -188,12 +188,12 @@ public class BQSRIntegrationTest extends WalkerTest { public Object[][] createPRTestData() { List tests = new ArrayList(); - tests.add(new Object[]{1, new PRTest(" -qq -1", "64622d37e234e57cf472068a00b4ec89")}); - tests.add(new Object[]{1, new PRTest(" -qq 6", "735270581d1ee836f3f99d68ecbd5f24")}); - tests.add(new Object[]{1, new PRTest(" -DIQ", "1e3b0a11f246a238ac4c06b084142715")}); + tests.add(new Object[]{1, new PRTest(" -qq -1", "b8d296fb78adc5cff7ce12073a69d985")}); + tests.add(new Object[]{1, new PRTest(" -qq 6", "2ee0cedf84c1a33a807438172bc36c11")}); + tests.add(new Object[]{1, new PRTest(" -DIQ", "46cf74900bf8b13438e6a195b3085c48")}); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, new PRTest("", "39ecde2c2ba4a7c109e65372618a419a")}); + tests.add(new Object[]{nct, new PRTest("", "88b88f006ebb1aade85089bfba5a9e8d")}); } return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index 047d69c5f..b5c8b3f99 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.QualityUtils; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java index 14dedebc4..cc5576500 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java @@ -50,7 +50,7 @@ import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d446da830..ad682734c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "664a14590d0966e63d3aabff2d7bab0a"); + HCTest(CEUTRIO_BAM, "", "72ce6a5e46644dfd73aeffba9d6131ea"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "111f3dc086a3cea1be9bd1ad6e1d18ed"); + HCTest(NA12878_BAM, "", "f9d696391f1f337092d70e3abcd32bfb"); } @Test(enabled = false) @@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "c70f753f7918a1c670ce4ed5c66de09e"); + "4e8beb2cdc3d77427f14acf37cea2bd0"); } private void HCTestComplexGGA(String bam, String args, String md5) { @@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "b1d3070f0c49becf34101e480ab6c589"); + "75e1df0dcf3728fd2b6e4735c4cc88ce"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "20eba2e54266f6aebf35b7b7b7e754e3"); + "1d244f2adbc72a0062eb673d56cbb5a8"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -113,7 +113,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "f9805488d85e59e1ae002d0d32d7011d"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a1bc844f62a9cb60dbb70d00ad36b85d"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -124,7 +124,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "4544a2916f46f58b32db8008776b91a3"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "23956e572f19ff26d25bbdfaa307675b"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -135,7 +135,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f3984a91e7562494c2a7e41fd05a6734"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "1255f466aa2d288f015cd55d8fece1ac"); } // That problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -146,14 +146,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3e9e025c539be6c5e0d0f2e5d8e86a62")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("103c91c4a78164949e166d3d27eb459b")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("34129e6c6310ef4eeeeb59b0e7ac0464")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("87fe31a4bbd68a9eb5d5910db5011c82")); executeTest("HCTestStructuralIndels: ", spec); } @@ -175,7 +175,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("5f4c07aaf1d2d34cccce43196a5fbd71")); + Arrays.asList("0fa19ec5cf737a3445544b59ecc995e9")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("6ead001b1f8e7cb433fd335f78fde5f0")); + Arrays.asList("5f4cbdcc9bffee6bba258dfac89492ed")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java index 9c0567464..2e31f6725 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.GenotypeBuilder; diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java index 8f4876c6f..87e208af4 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java @@ -50,7 +50,7 @@ package org.broadinstitute.sting.utils.pairhmm; // the imports for unit testing. import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.testng.Assert; import org.testng.annotations.DataProvider; diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java index 09f751fbc..da78932d1 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java @@ -50,6 +50,7 @@ package org.broadinstitute.sting.utils.recalibration; // the imports for unit testing. +import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; @@ -57,7 +58,9 @@ import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; public class RecalDatumUnitTest extends BaseTest { @@ -277,4 +280,31 @@ public class RecalDatumUnitTest extends BaseTest { Assert.assertFalse(Double.isInfinite(log10likelihood)); Assert.assertFalse(Double.isNaN(log10likelihood)); } + + @Test + public void basicHierarchicalBayesianQualityEstimateTest() { + + for( double epsilon = 15.0; epsilon <= 60.0; epsilon += 2.0 ) { + double RG_Q = 45.0; + RecalDatum RG = new RecalDatum( (long)100000000, (long) (100000000 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q); + double Q = 30.0; + RecalDatum QS = new RecalDatum( (long)100000000, (long) (100000000 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q); + RecalDatum COV = new RecalDatum( (long)15, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality + + // initial epsilon condition shouldn't matter when there are a lot of observations + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( epsilon, RG, QS, Collections.singletonList(COV)), Q, 1E-4 ); + } + + for( double epsilon = 15.0; epsilon <= 60.0; epsilon += 2.0 ) { + double RG_Q = 45.0; + RecalDatum RG = new RecalDatum( (long)10, (long) (10 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q); + double Q = 30.0; + RecalDatum QS = new RecalDatum( (long)10, (long) (10 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q); + RecalDatum COV = new RecalDatum( (long)15, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality + + // initial epsilon condition dominates when there is no data + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( epsilon, RG, QS, Collections.singletonList(COV)), epsilon, 1E-4 ); + } + + } } \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RepeatCovariatesUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RepeatCovariatesUnitTest.java index c0cb2d7b6..6d13fe96c 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RepeatCovariatesUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RepeatCovariatesUnitTest.java @@ -52,7 +52,7 @@ import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.testng.Assert; import org.testng.annotations.BeforeClass; diff --git a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java index f979df39c..a4e04ab5d 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java +++ b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.alignment; import net.sf.samtools.*; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java index 2a560fb15..32126f0e3 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java index f467c403a..169ad7aa5 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java @@ -29,7 +29,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.*; import org.broadinstitute.sting.alignment.Aligner; import org.broadinstitute.sting.alignment.Alignment; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java index 5fc81cb9f..f60218fe9 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.alignment.Alignment; import org.broadinstitute.sting.alignment.bwa.BWAAligner; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.reference.bwt.*; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index 4f767f90b..90d6dda48 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.contexts; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index 11f03ce36..6bfa56828 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -29,7 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.variantcontext.Allele; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index c2a064dbe..839ebcfa1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -31,8 +31,11 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.crypt.CryptUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.IOUtils; +import org.broadinstitute.sting.utils.io.Resource; import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; import org.jets3t.service.S3Service; import org.jets3t.service.S3ServiceException; @@ -48,6 +51,7 @@ import org.simpleframework.xml.stream.HyphenStyle; import java.io.*; import java.security.NoSuchAlgorithmException; +import java.security.PublicKey; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -309,6 +313,51 @@ public class GATKRunReport { } } + /** + * Decrypts encrypted AWS key from encryptedKeySource + * @param encryptedKeySource a file containing an encrypted AWS key + * @return a decrypted AWS key as a String + */ + public static String decryptAWSKey(final File encryptedKeySource) throws FileNotFoundException { + return decryptAWSKey(new FileInputStream(encryptedKeySource)); + } + + /** + * @see #decryptAWSKey(java.io.File) but with input from an inputstream + */ + private static String decryptAWSKey(final InputStream encryptedKeySource) { + final PublicKey key = CryptUtils.loadGATKDistributedPublicKey(); + final byte[] fromDisk = IOUtils.readStreamIntoByteArray(encryptedKeySource); + final byte[] decrypted = CryptUtils.decryptData(fromDisk, key); + return new String(decrypted); + } + + /** + * Get the decrypted AWS key sorted in the resource directories of name + * @param name the name of the file containing the needed AWS key + * @return a non-null GATK + */ + private static String getAWSKey(final String name) { + final Resource resource = new Resource(name, GATKRunReport.class); + return decryptAWSKey(resource.getResourceContentsAsStream()); + } + + /** + * Get the AWS access key for the GATK user + * @return a non-null AWS access key for the GATK user + */ + protected static String getAWSAccessKey() { + return getAWSKey("GATK_AWS_access.key"); + } + + /** + * Get the AWS secret key for the GATK user + * @return a non-null AWS secret key for the GATK user + */ + protected static String getAWSSecretKey() { + return getAWSKey("GATK_AWS_secret.key"); + } + private class S3PutRunnable implements Runnable { public AtomicBoolean isSuccess; @@ -331,17 +380,17 @@ public class GATKRunReport { // are stored in an AWSCredentials object: // IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket - String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user - String awsSecretKey = "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"; // GATK AWS user - AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey); + final String awsAccessKey = getAWSAccessKey(); // GATK AWS user + final String awsSecretKey = getAWSSecretKey(); // GATK AWS user + final AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey); // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t. - S3Service s3Service = new RestS3Service(awsCredentials); + final S3Service s3Service = new RestS3Service(awsCredentials); // Create an S3Object based on a file, with Content-Length set automatically and // Content-Type set based on the file's extension (using the Mimetypes utility class) - S3Object fileObject = new S3Object(key, report); + final S3Object fileObject = new S3Object(key, report); //logger.info("Created S3Object" + fileObject); //logger.info("Uploading " + localFile + " to AWS bucket"); s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject); diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_access.key b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_access.key new file mode 100644 index 000000000..45242c3cd --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_access.key @@ -0,0 +1,2 @@ +BÜw“ìX~¨[užûG¢£›ÔøÅ¦e‰÷á‡,툉)s­/sg1µáÙL„±ƒQ¹šaG%$·RÝŠìŸ{xÿqPz׊Ž”J®¬}”á\¸–´”{(äâÑÍBœK’&ܶ™ŽõÇ,’`¥³÷”@`oX +%/`mâ֑Ȥ3Àhø3ÜàrQº v1šaªW•š?ˆ‘uH¼ŒÖÕÇÛØæsðÅâŽaÌz¬Þ¿­ÅMÆË¦?îUzh÷ì·k#+ŒxÄ„Müík“ ¦©Xž²)ºý°MıšÕ• Çipò-î?jêŠúHµ@d+HR|šbò³F \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_secret.key b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_secret.key new file mode 100644 index 000000000..b86a25036 Binary files /dev/null and b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATK_AWS_secret.key differ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 704422308..da0634dd2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.variant.variantcontext.VariantContext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 8cabb7c82..4a8c95dd1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index e30d43185..b03b5327f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.variant.vcf.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java index 566aac6b5..79ff97333 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java @@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java index fe2eee2a2..f0d6f7301 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fragments.FragmentCollection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java index dbb8ed5a6..d9e5e3e98 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java @@ -41,7 +41,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java index 56aa6f2d7..d013825eb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java index 668d3fd5f..84520b24b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java index 8a7f2bcc3..13dc238cd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java index ad7d85031..ee2530e8b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java index b4d8dd564..84a28b05a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRef.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java index fe2b75464..91059edfd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java @@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.clipping.ClippingOp; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 1a9ce5ad5..78c55d1c4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.codecs.table.TableFeature; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java index e9fc8114b..2a7e29583 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java @@ -32,9 +32,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.VariantContext; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; @Analysis(description = "Evaluation summary for multi-allelic variants") public class MultiallelicSummary extends VariantEvaluator implements StandardEval { @@ -122,7 +122,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva private void calculatePairwiseTiTv(VariantContext vc) { for ( Allele alt : vc.getAlternateAlleles() ) { - if ( VariantContextUtils.isTransition(vc.getReference(), alt) ) + if ( GATKVariantContextUtils.isTransition(vc.getReference(), alt) ) nTi++; else nTv++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index 4ad7be045..5618f607f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -30,9 +30,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.VariantContext; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; @Analysis(description = "Ti/Tv Variant Evaluator") public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval { @@ -61,7 +61,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv public void updateTiTv(VariantContext vc, boolean updateStandard) { if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphicInSamples()) { - if (VariantContextUtils.isTransition(vc)) { + if ( GATKVariantContextUtils.isTransition(vc)) { if (updateStandard) nTiInComp++; else nTi++; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index 2a1dbd277..48d926115 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -35,11 +35,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.VariantContext; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.*; @@ -226,7 +226,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { // type specific calculations if ( type == Type.SNP && eval.isBiallelic() ) { - titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample; + titvTable = GATKVariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample; titvTable.inc(type, ALL); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index d0d6a68a8..0d87bb921 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -280,7 +280,7 @@ public class CombineVariants extends RodWalker implements Tree List mergedVCs = new ArrayList(); if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) { - Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + Map> VCsByType = GATKVariantContextUtils.separateVariantContextsByType(vcs); // TODO -- clean this up in a refactoring // merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type) @@ -320,7 +320,7 @@ public class CombineVariants extends RodWalker implements Tree // re-compute chromosome counts VariantContextUtils.calculateChromosomeCounts(builder, false); if ( minimalVCF ) - VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY)); + GATKVariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY)); vcfWriter.add(builder.make()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index b7f88a330..efb84edef 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -25,11 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; import com.google.java.contract.Requires; -import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFHeader; @@ -277,8 +275,8 @@ public class ConcordanceMetrics { if ( truth.isMonomorphicInSamples() ) return EVAL_ONLY; - boolean evalSubsetTruth = VariantContextUtils.allelesAreSubset(eval,truth); - boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truth,eval); + boolean evalSubsetTruth = GATKVariantContextUtils.allelesAreSubset(eval, truth); + boolean truthSubsetEval = GATKVariantContextUtils.allelesAreSubset(truth, eval); if ( evalSubsetTruth && truthSubsetEval ) return ALLELES_MATCH; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 3ef9fa3d7..b78038953 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -40,12 +40,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContextBuilder; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; @@ -122,7 +122,7 @@ public class LiftoverVariants extends RodWalker { if ( toInterval != null ) { // check whether the strand flips, and if so reverse complement everything if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) { - vc = VariantContextUtils.reverseComplement(vc); + vc = GATKVariantContextUtils.reverseComplement(vc); } vc = new VariantContextBuilder(vc).loc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length).make(); @@ -133,7 +133,7 @@ public class LiftoverVariants extends RodWalker { .attribute("OriginalStart", fromInterval.getStart()).make(); } - if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) { + if ( originalVC.isSNP() && originalVC.isBiallelic() && GATKVariantContextUtils.getSNPSubstitutionType(originalVC) != GATKVariantContextUtils.getSNPSubstitutionType(vc) ) { logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s", originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(), originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0))); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 1ea85df47..f6c02592d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -41,7 +41,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.io.PrintStream; import java.lang.reflect.Array; @@ -432,7 +431,7 @@ public class VariantsToTable extends RodWalker { getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } }); getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) { if ( vc.isSNP() && vc.isBiallelic() ) - return VariantContextUtils.isTransition(vc) ? "1" : "0"; + return GATKVariantContextUtils.isTransition(vc) ? "1" : "0"; else return "-1"; }}); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 5afeccffe..a51014114 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -39,11 +39,12 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -246,7 +247,7 @@ public class VariantsToVCF extends RodWalker { vcfwriter.writeHeader(new VCFHeader(hInfo, samples)); } - vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); + vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); vcfwriter.add(vc); } diff --git a/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java similarity index 95% rename from public/java/src/org/broadinstitute/variant/utils/BaseUtils.java rename to public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 76b49edb9..e6e1db813 100644 --- a/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -23,12 +23,14 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.variant.utils; +package org.broadinstitute.sting.utils; import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Arrays; -import java.util.Random; /** * BaseUtils contains some basic utilities for manipulating nucleotides. @@ -95,9 +97,6 @@ public class BaseUtils { baseIndexWithIupacMap['v'] = Base.N.ordinal(); } - // Use a fixed random seed to allow for deterministic results when using random bases - private static final Random randomNumberGen = new Random(47382911L); - /// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or // a pyrimidine to another pyrimidine nucleotide (C <-> T). // Approximately two out of every three single nucleotide polymorphisms (SNPs) are transitions. @@ -174,7 +173,7 @@ public class BaseUtils { if ( baseIndex == Base.N.ordinal() ) { bases[i] = 'N'; } else if ( errorOnBadReferenceBase && baseIndex == -1 ) { - throw new IllegalStateException("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'"); + throw new UserException.BadInput("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'"); } } return bases; @@ -251,7 +250,7 @@ public class BaseUtils { */ static public int simpleBaseToBaseIndex(final byte base) { if ( base < 0 || base >= 256 ) - throw new IllegalArgumentException("Non-standard bases were encountered in either the input reference or BAM file(s)"); + throw new UserException.BadInput("Non-standard bases were encountered in either the input reference or BAM file(s)"); return baseIndexMap[base]; } @@ -491,7 +490,7 @@ public class BaseUtils { int randomBaseIndex = excludeBaseIndex; while (randomBaseIndex == excludeBaseIndex) { - randomBaseIndex = randomNumberGen.nextInt(4); + randomBaseIndex = GenomeAnalysisEngine.getRandomGenerator().nextInt(4); } return randomBaseIndex; @@ -515,7 +514,7 @@ public class BaseUtils { case 'N': return 'N'; default: - throw new IllegalArgumentException("base must be A, C, G or T. " + (char) base + " is not a valid base."); + throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base."); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index baab1f5fa..6e8a412c3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -40,7 +40,7 @@ import java.util.*; public class Haplotype extends Allele { private GenomeLoc genomeLocation = null; - private HashMap eventMap = null; + private Map eventMap = null; private Cigar cigar; private int alignmentStartHapwrtRef; public int leftBreakPoint = 0; @@ -81,11 +81,11 @@ public class Haplotype extends Allele { return Arrays.hashCode(getBases()); } - public HashMap getEventMap() { + public Map getEventMap() { return eventMap; } - public void setEventMap( final HashMap eventMap ) { + public void setEventMap( final Map eventMap ) { this.eventMap = eventMap; } diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 3f5dffa68..eb287abd8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils; +import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMProgramRecord; @@ -34,7 +35,10 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.utils.text.TextFormattingUtils; +import java.math.BigInteger; import java.net.InetAddress; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.*; /** @@ -911,4 +915,28 @@ public class Utils { return subLists; } + /** + * @see #calcMD5(byte[]) + */ + public static String calcMD5(final String s) throws NoSuchAlgorithmException { + return calcMD5(s.getBytes()); + } + + /** + * Calculate the md5 for bytes, and return the result as a 32 character string + * + * @param bytes the bytes to calculate the md5 of + * @return the md5 of bytes, as a 32-character long string + * @throws NoSuchAlgorithmException + */ + @Ensures({"result != null", "result.length() == 32"}) + public static String calcMD5(final byte[] bytes) throws NoSuchAlgorithmException { + if ( bytes == null ) throw new IllegalArgumentException("bytes cannot be null"); + final byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytes); + final BigInteger bigInt = new BigInteger(1, thedigest); + + String md5String = bigInt.toString(16); + while (md5String.length() < 32) md5String = "0" + md5String; // pad to length 32 + return md5String; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 13add5e7d..dd6735d89 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -67,7 +67,7 @@ public class ActiveRegion implements HasGenomeLocation { * The reads included in this active region. May be empty upon creation, and expand / contract * as reads are added or removed from this region. */ - private final ArrayList reads = new ArrayList(); + private final List reads = new ArrayList(); /** * An ordered list (by genomic coordinate) of the ActivityProfileStates that went @@ -355,7 +355,7 @@ public class ActiveRegion implements HasGenomeLocation { * read coordinates. */ public void hardClipToActiveRegion() { - final ArrayList clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() ); + final List clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() ); ReadUtils.sortReadsByCoordinate(clippedReads); clearReads(); addAll(clippedReads); diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index 87526545d..45dd55af7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -362,8 +362,8 @@ public class ReadClipper { return GATKSAMRecord.emptyRead(read); } - public static ArrayList hardClipToRegion( final ArrayList reads, final int refStart, final int refStop ) { - final ArrayList returnList = new ArrayList( reads.size() ); + public static List hardClipToRegion( final List reads, final int refStart, final int refStop ) { + final List returnList = new ArrayList( reads.size() ); for( final GATKSAMRecord read : reads ) { final GATKSAMRecord clippedRead = hardClipToRegion( read, refStart, refStop ); if( !clippedRead.isEmpty() ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 39f5b06c6..c78294505 100644 --- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.duplicates; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.QualityUtils; diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java index a749625cd..c30ac4f7f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -33,7 +33,7 @@ import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.StringUtil; import org.apache.log4j.Priority; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/DiploidGenotype.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/DiploidGenotype.java index febc62716..ceae4bb47 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/DiploidGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/DiploidGenotype.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.genotyper; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; public enum DiploidGenotype { AA ('A', 'A'), diff --git a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java index c25ecb49b..59466dad3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java @@ -359,19 +359,9 @@ public class IOUtils { */ public static void writeResource(Resource resource, File file) { String path = resource.getPath(); - Class clazz = resource.getRelativeClass(); - InputStream inputStream = null; + InputStream inputStream = resource.getResourceContentsAsStream(); OutputStream outputStream = null; try { - if (clazz == null) { - inputStream = ClassLoader.getSystemResourceAsStream(path); - if (inputStream == null) - throw new IllegalArgumentException("Resource not found: " + path); - } else { - inputStream = clazz.getResourceAsStream(path); - if (inputStream == null) - throw new IllegalArgumentException("Resource not found relative to " + clazz + ": " + path); - } outputStream = FileUtils.openOutputStream(file); org.apache.commons.io.IOUtils.copy(inputStream, outputStream); } catch (IOException e) { diff --git a/public/java/src/org/broadinstitute/sting/utils/io/Resource.java b/public/java/src/org/broadinstitute/sting/utils/io/Resource.java index 1f181a826..85ca5ce1c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/io/Resource.java +++ b/public/java/src/org/broadinstitute/sting/utils/io/Resource.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.io; import java.io.File; +import java.io.InputStream; /** * Stores a resource by path and a relative class. @@ -64,4 +65,27 @@ public class Resource { File.separator, path); } + + /** + * Get the contents of this resource as an InputStream + * @throws IllegalArgumentException if resource cannot be read + * @return an input stream that will read the contents of this resource + */ + public InputStream getResourceContentsAsStream() { + final Class clazz = getRelativeClass(); + + final InputStream inputStream; + if (clazz == null) { + inputStream = ClassLoader.getSystemResourceAsStream(path); + if (inputStream == null) + throw new IllegalArgumentException("Resource not found: " + path); + } else { + inputStream = clazz.getResourceAsStream(path); + if (inputStream == null) + throw new IllegalArgumentException("Resource not found relative to " + clazz + ": " + path); + + } + + return inputStream; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 5a5358208..51753ca5e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -29,7 +29,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index fe43f85bd..65c47c23b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.utils.fragments.FragmentCollection; import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index b7a813ec2..9c5f95472 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 29f8c8dcd..39d058aea 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import java.io.File; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index 2ae289214..b0f3cc5fe 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -30,10 +30,7 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import org.broad.tribble.TribbleException; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFConstants; @@ -53,6 +50,7 @@ public class GATKVariantContextUtils { public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; public final static String MERGE_INTERSECTION = "Intersection"; + public enum GenotypeMergeType { /** * Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD. @@ -108,6 +106,68 @@ public class GATKVariantContextUtils { return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true); } + public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) { + if (!context.isSNP() || !context.isBiallelic()) + throw new IllegalStateException("Requested SNP substitution type for bialleic non-SNP " + context); + return BaseUtils.SNPSubstitutionType(context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]); + } + + /** + * If this is a BiAlleic SNP, is it a transition? + */ + public static boolean isTransition(VariantContext context) { + return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION; + } + + /** + * If this is a BiAlleic SNP, is it a transversion? + */ + public static boolean isTransversion(VariantContext context) { + return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION; + } + + public static boolean isTransition(Allele ref, Allele alt) { + return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSITION; + } + + public static boolean isTransversion(Allele ref, Allele alt) { + return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSVERSION; + } + + /** + * Returns a context identical to this with the REF and ALT alleles reverse complemented. + * + * @param vc variant context + * @return new vc + */ + public static VariantContext reverseComplement(VariantContext vc) { + // create a mapping from original allele to reverse complemented allele + HashMap alleleMap = new HashMap(vc.getAlleles().size()); + for ( Allele originalAllele : vc.getAlleles() ) { + Allele newAllele; + if ( originalAllele.isNoCall() ) + newAllele = originalAllele; + else + newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); + alleleMap.put(originalAllele, newAllele); + } + + // create new Genotype objects + GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); + for ( final Genotype genotype : vc.getGenotypes() ) { + List newAlleles = new ArrayList(); + for ( Allele allele : genotype.getAlleles() ) { + Allele newAllele = alleleMap.get(allele); + if ( newAllele == null ) + newAllele = Allele.NO_CALL; + newAlleles.add(newAllele); + } + newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make()); + } + + return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); + } + /** * Returns true iff VC is an non-complex indel where every allele represents an expansion or * contraction of a series of identical bases in the reference. @@ -954,6 +1014,124 @@ public class GATKVariantContextUtils { } } + private final static Map subsetAttributes(final CommonInfo igc, final Collection keysToPreserve) { + Map attributes = new HashMap(keysToPreserve.size()); + for ( final String key : keysToPreserve ) { + if ( igc.hasAttribute(key) ) + attributes.put(key, igc.getAttribute(key)); + } + return attributes; + } + + /** + * @deprecated use variant context builder version instead + * @param vc the variant context + * @param keysToPreserve the keys to preserve + * @return a pruned version of the original variant context + */ + @Deprecated + public static VariantContext pruneVariantContext(final VariantContext vc, Collection keysToPreserve ) { + return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make(); + } + + public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection keysToPreserve ) { + final VariantContext vc = builder.make(); + if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList(); + + // VC info + final Map attributes = subsetAttributes(vc.getCommonInfo(), keysToPreserve); + + // Genotypes + final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples()); + for ( final Genotype g : vc.getGenotypes() ) { + final GenotypeBuilder gb = new GenotypeBuilder(g); + // remove AD, DP, PL, and all extended attributes, keeping just GT and GQ + gb.noAD().noDP().noPL().noAttributes(); + genotypes.add(gb.make()); + } + + return builder.genotypes(genotypes).attributes(attributes); + } + + public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) { + // if all alleles of vc1 are a contained in alleles of vc2, return true + if (!vc1.getReference().equals(vc2.getReference())) + return false; + + for (Allele a :vc1.getAlternateAlleles()) { + if (!vc2.getAlternateAlleles().contains(a)) + return false; + } + + return true; + } + + public static Map> separateVariantContextsByType(Collection VCs) { + HashMap> mappedVCs = new HashMap>(); + for ( VariantContext vc : VCs ) { + + // look at previous variant contexts of different type. If: + // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list + // b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list) + // c) neither: do nothing, just add vc to its own list + boolean addtoOwnList = true; + for (VariantContext.Type type : VariantContext.Type.values()) { + if (type.equals(vc.getType())) + continue; + + if (!mappedVCs.containsKey(type)) + continue; + + List vcList = mappedVCs.get(type); + for (int k=0; k < vcList.size(); k++) { + VariantContext otherVC = vcList.get(k); + if (allelesAreSubset(otherVC,vc)) { + // otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list + vcList.remove(k); + // avoid having empty lists + if (vcList.size() == 0) + mappedVCs.remove(type); + if ( !mappedVCs.containsKey(vc.getType()) ) + mappedVCs.put(vc.getType(), new ArrayList()); + mappedVCs.get(vc.getType()).add(otherVC); + break; + } + else if (allelesAreSubset(vc,otherVC)) { + // vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own + mappedVCs.get(type).add(vc); + addtoOwnList = false; + break; + } + } + } + if (addtoOwnList) { + if ( !mappedVCs.containsKey(vc.getType()) ) + mappedVCs.put(vc.getType(), new ArrayList()); + mappedVCs.get(vc.getType()).add(vc); + } + } + + return mappedVCs; + } + + public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set allowedAttributes) { + if ( allowedAttributes == null ) + return vc; + + GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); + for ( final Genotype genotype : vc.getGenotypes() ) { + Map attrs = new HashMap(); + for ( Map.Entry attr : genotype.getExtendedAttributes().entrySet() ) { + if ( allowedAttributes.contains(attr.getKey()) ) + attrs.put(attr.getKey(), attr.getValue()); + } + newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make()); + } + + return new VariantContextBuilder(vc).genotypes(newGenotypes).make(); + } + + private static class AlleleMapper { private VariantContext vc = null; private Map map = null; diff --git a/public/java/src/org/broadinstitute/variant/variantcontext/Allele.java b/public/java/src/org/broadinstitute/variant/variantcontext/Allele.java index 0a0b4d0b7..e0a6495a5 100644 --- a/public/java/src/org/broadinstitute/variant/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/Allele.java @@ -25,7 +25,7 @@ package org.broadinstitute.variant.variantcontext; -import org.broadinstitute.variant.utils.BaseUtils; +import net.sf.samtools.util.StringUtil; import java.util.Arrays; import java.util.Collection; @@ -130,7 +130,7 @@ public class Allele implements Comparable { if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele"); } else { - BaseUtils.convertToUpperCase(bases); + StringUtil.toUpperCase(bases); } this.isRef = isRef; diff --git a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java index 05edee153..1fce89431 100644 --- a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java @@ -625,6 +625,10 @@ public class VariantContext implements Feature { // to enable tribble integratio public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); } public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); } + public CommonInfo getCommonInfo() { + return commonInfo; + } + // --------------------------------------------------------------------------------------------------------- // // Working with alleles diff --git a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java index fa2b5c9e5..a5b7b6c04 100644 --- a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java @@ -30,7 +30,6 @@ import com.google.java.contract.Requires; import org.apache.commons.jexl2.Expression; import org.apache.commons.jexl2.JexlEngine; import org.broad.tribble.TribbleException; -import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.GeneralUtils; import org.broadinstitute.variant.vcf.*; @@ -317,106 +316,6 @@ public class VariantContextUtils { return r; } - private final static Map subsetAttributes(final CommonInfo igc, final Collection keysToPreserve) { - Map attributes = new HashMap(keysToPreserve.size()); - for ( final String key : keysToPreserve ) { - if ( igc.hasAttribute(key) ) - attributes.put(key, igc.getAttribute(key)); - } - return attributes; - } - - /** - * @deprecated use variant context builder version instead - * @param vc the variant context - * @param keysToPreserve the keys to preserve - * @return a pruned version of the original variant context - */ - @Deprecated - public static VariantContext pruneVariantContext(final VariantContext vc, Collection keysToPreserve ) { - return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make(); - } - - public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection keysToPreserve ) { - final VariantContext vc = builder.make(); - if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList(); - - // VC info - final Map attributes = subsetAttributes(vc.commonInfo, keysToPreserve); - - // Genotypes - final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples()); - for ( final Genotype g : vc.getGenotypes() ) { - final GenotypeBuilder gb = new GenotypeBuilder(g); - // remove AD, DP, PL, and all extended attributes, keeping just GT and GQ - gb.noAD().noDP().noPL().noAttributes(); - genotypes.add(gb.make()); - } - - return builder.genotypes(genotypes).attributes(attributes); - } - - public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) { - // if all alleles of vc1 are a contained in alleles of vc2, return true - if (!vc1.getReference().equals(vc2.getReference())) - return false; - - for (Allele a :vc1.getAlternateAlleles()) { - if (!vc2.getAlternateAlleles().contains(a)) - return false; - } - - return true; - } - - public static Map> separateVariantContextsByType(Collection VCs) { - HashMap> mappedVCs = new HashMap>(); - for ( VariantContext vc : VCs ) { - - // look at previous variant contexts of different type. If: - // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list - // b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list) - // c) neither: do nothing, just add vc to its own list - boolean addtoOwnList = true; - for (VariantContext.Type type : VariantContext.Type.values()) { - if (type.equals(vc.getType())) - continue; - - if (!mappedVCs.containsKey(type)) - continue; - - List vcList = mappedVCs.get(type); - for (int k=0; k < vcList.size(); k++) { - VariantContext otherVC = vcList.get(k); - if (allelesAreSubset(otherVC,vc)) { - // otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list - vcList.remove(k); - // avoid having empty lists - if (vcList.size() == 0) - mappedVCs.remove(type); - if ( !mappedVCs.containsKey(vc.getType()) ) - mappedVCs.put(vc.getType(), new ArrayList()); - mappedVCs.get(vc.getType()).add(otherVC); - break; - } - else if (allelesAreSubset(vc,otherVC)) { - // vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own - mappedVCs.get(type).add(vc); - addtoOwnList = false; - break; - } - } - } - if (addtoOwnList) { - if ( !mappedVCs.containsKey(vc.getType()) ) - mappedVCs.put(vc.getType(), new ArrayList()); - mappedVCs.get(vc.getType()).add(vc); - } - } - - return mappedVCs; - } - // TODO: remove that after testing // static private void verifyUniqueSampleNames(Collection unsortedVCs) { // Set names = new HashSet(); @@ -432,85 +331,6 @@ public class VariantContextUtils { // } - /** - * Returns a context identical to this with the REF and ALT alleles reverse complemented. - * - * @param vc variant context - * @return new vc - */ - public static VariantContext reverseComplement(VariantContext vc) { - // create a mapping from original allele to reverse complemented allele - HashMap alleleMap = new HashMap(vc.getAlleles().size()); - for ( Allele originalAllele : vc.getAlleles() ) { - Allele newAllele; - if ( originalAllele.isNoCall() ) - newAllele = originalAllele; - else - newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); - alleleMap.put(originalAllele, newAllele); - } - - // create new Genotype objects - GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); - for ( final Genotype genotype : vc.getGenotypes() ) { - List newAlleles = new ArrayList(); - for ( Allele allele : genotype.getAlleles() ) { - Allele newAllele = alleleMap.get(allele); - if ( newAllele == null ) - newAllele = Allele.NO_CALL; - newAlleles.add(newAllele); - } - newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make()); - } - - return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); - } - - public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set allowedAttributes) { - if ( allowedAttributes == null ) - return vc; - - GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); - for ( final Genotype genotype : vc.getGenotypes() ) { - Map attrs = new HashMap(); - for ( Map.Entry attr : genotype.getExtendedAttributes().entrySet() ) { - if ( allowedAttributes.contains(attr.getKey()) ) - attrs.put(attr.getKey(), attr.getValue()); - } - newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make()); - } - - return new VariantContextBuilder(vc).genotypes(newGenotypes).make(); - } - - public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) { - if (!context.isSNP() || !context.isBiallelic()) - throw new IllegalStateException("Requested SNP substitution type for bialleic non-SNP " + context); - return BaseUtils.SNPSubstitutionType(context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]); - } - - /** - * If this is a BiAlleic SNP, is it a transition? - */ - public static boolean isTransition(VariantContext context) { - return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION; - } - - /** - * If this is a BiAlleic SNP, is it a transversion? - */ - public static boolean isTransversion(VariantContext context) { - return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION; - } - - public static boolean isTransition(Allele ref, Allele alt) { - return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSITION; - } - - public static boolean isTransversion(Allele ref, Allele alt) { - return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSVERSION; - } - public static int getSize( VariantContext vc ) { return vc.getEnd() - vc.getStart() + 1; } diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java index aed98b78a..2b0d52a11 100644 --- a/public/java/test/org/broadinstitute/sting/MD5DB.java +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -28,11 +28,10 @@ package org.broadinstitute.sting; import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.*; -import java.math.BigInteger; -import java.security.MessageDigest; import java.util.Arrays; /** @@ -252,11 +251,7 @@ public class MD5DB { */ public String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) { try { - byte[] bytesOfMessage = getBytesFromFile(resultsFile); - byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage); - BigInteger bigInt = new BigInteger(1, thedigest); - String filemd5sum = bigInt.toString(16); - while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32 + final String filemd5sum = Utils.calcMD5(getBytesFromFile(resultsFile)); // // copy md5 to integrationtests diff --git a/public/java/test/org/broadinstitute/sting/gatk/phonehome/GATKRunReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/phonehome/GATKRunReportUnitTest.java new file mode 100644 index 000000000..03f19968c --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/phonehome/GATKRunReportUnitTest.java @@ -0,0 +1,52 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.phonehome; + +import junit.framework.Assert; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; +import org.testng.annotations.Test; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +public class GATKRunReportUnitTest extends BaseTest { + @Test + public void testAccessKey() throws Exception { + testAWSKey(GATKRunReport.getAWSAccessKey(), "c0f0afa1ff5ba41d9bf216cfcdbf26bf"); + } + + @Test + public void testSecretKey() throws Exception { + testAWSKey(GATKRunReport.getAWSSecretKey(), "db2f13b3a7c98ad24e28783733ec4a62"); + } + + private void testAWSKey(final String accessKey, final String expectedMD5) throws Exception { + Assert.assertNotNull(accessKey, "AccessKey should not be null"); + final String actualmd5 = Utils.calcMD5(accessKey); + Assert.assertEquals(actualmd5, expectedMD5); + } +} diff --git a/public/java/test/org/broadinstitute/variant/utils/BaseUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/BaseUtilsUnitTest.java similarity index 93% rename from public/java/test/org/broadinstitute/variant/utils/BaseUtilsUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/BaseUtilsUnitTest.java index 738fa28e2..cb80dbc09 100644 --- a/public/java/test/org/broadinstitute/variant/utils/BaseUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/BaseUtilsUnitTest.java @@ -23,20 +23,22 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.variant.utils; +package org.broadinstitute.sting.utils; -import org.broadinstitute.variant.VariantBaseTest; +import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.Test; import org.testng.annotations.BeforeClass; -public class BaseUtilsUnitTest extends VariantBaseTest { +public class BaseUtilsUnitTest extends BaseTest { @BeforeClass public void init() { } @Test public void testMostFrequentBaseFraction() { + logger.warn("Executing testMostFrequentBaseFraction"); + compareFrequentBaseFractionToExpected("AAAAA", 1.0); compareFrequentBaseFractionToExpected("ACCG", 0.5); compareFrequentBaseFractionToExpected("ACCCCTTTTG", 4.0/10.0); @@ -44,7 +46,7 @@ public class BaseUtilsUnitTest extends VariantBaseTest { private void compareFrequentBaseFractionToExpected(String sequence, double expected) { double fraction = BaseUtils.mostFrequentBaseFraction(sequence.getBytes()); - Assert.assertTrue(GeneralUtils.compareDoubles(fraction, expected) == 0); + Assert.assertTrue(MathUtils.compareDoubles(fraction, expected) == 0); } @Test @@ -64,6 +66,8 @@ public class BaseUtilsUnitTest extends VariantBaseTest { @Test public void testTransitionTransversion() { + logger.warn("Executing testTransitionTransversion"); + Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'T' ) == BaseUtils.BaseSubstitutionType.TRANSVERSION ); Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'C' ) == BaseUtils.BaseSubstitutionType.TRANSVERSION ); Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'G' ) == BaseUtils.BaseSubstitutionType.TRANSITION ); @@ -89,6 +93,8 @@ public class BaseUtilsUnitTest extends VariantBaseTest { @Test public void testReverseComplementString() { + logger.warn("Executing testReverseComplementString"); + compareRCStringToExpected("ACGGT", "ACCGT"); compareRCStringToExpected("TCGTATATCTCGCTATATATATATAGCTCTAGTATA", "TATACTAGAGCTATATATATATAGCGAGATATACGA"); compareRCStringToExpected("AAAN", "NTTT"); diff --git a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java index fc10f1102..5d6ecd0f9 100644 --- a/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/UtilsUnitTest.java @@ -25,10 +25,13 @@ package org.broadinstitute.sting.utils; +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.utils.io.IOUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; +import java.io.File; import java.util.LinkedHashMap; import java.util.Map; @@ -135,4 +138,16 @@ public class UtilsUnitTest extends BaseTest { actual = Utils.escapeExpressions(" one two 'three four' "); Assert.assertEquals(actual, expected); } + + @Test + public void testCalcMD5() throws Exception { + final File source = new File(publicTestDir + "exampleFASTA.fasta"); + final String sourceMD5 = "36880691cf9e4178216f7b52e8d85fbe"; + + final byte[] sourceBytes = IOUtils.readFileIntoByteArray(source); + Assert.assertEquals(Utils.calcMD5(sourceBytes), sourceMD5); + + final String sourceString = FileUtils.readFileToString(source); + Assert.assertEquals(Utils.calcMD5(sourceString), sourceMD5); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java index b65811103..0c1b5b069 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java @@ -252,7 +252,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { Assert.assertEquals(changingNs, preservingNs + 4); } - @Test(enabled = true, expectedExceptions = {IllegalStateException.class}) + @Test(enabled = true, expectedExceptions = {UserException.class}) public void testFailOnBadBase() throws FileNotFoundException, InterruptedException { final String testFasta = privateTestDir + "problematicFASTA.fasta"; final CachingIndexedFastaSequenceFile fasta = new CachingIndexedFastaSequenceFile(new File(testFasta)); diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index 20971643e..b01c53e77 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils.sam; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/variant/variantcontext/GenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/GenotypeLikelihoodsUnitTest.java index fb180c476..562130101 100644 --- a/public/java/test/org/broadinstitute/variant/variantcontext/GenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/GenotypeLikelihoodsUnitTest.java @@ -31,7 +31,6 @@ package org.broadinstitute.variant.variantcontext; import org.broad.tribble.TribbleException; import org.broadinstitute.variant.VariantBaseTest; -import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.GeneralUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -154,9 +153,9 @@ public class GenotypeLikelihoodsUnitTest extends VariantBaseTest { public void testGetQualFromLikelihoodsMultiAllelic() { GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic); - Allele ref = Allele.create(BaseUtils.Base.A.base,true); - Allele alt1 = Allele.create(BaseUtils.Base.C.base); - Allele alt2 = Allele.create(BaseUtils.Base.T.base); + Allele ref = Allele.create((byte)'A',true); + Allele alt1 = Allele.create((byte)'C'); + Allele alt2 = Allele.create((byte)'T'); List allAlleles = Arrays.asList(ref,alt1,alt2); List gtAlleles = Arrays.asList(alt1,alt2); GenotypeBuilder gtBuilder = new GenotypeBuilder();