diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java index 4e75f3ddb..d589f9029 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java @@ -131,7 +131,7 @@ public class AlignmentContextUtils { } } - public static Map splitContextBySampleName(ReadBackedPileup pileup, String assumedSingleSample) { + public static Map splitContextBySampleName(ReadBackedPileup pileup) { return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 6cab6d95f..312b505ec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -14,7 +14,8 @@ import java.util.List; /** - * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele) + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele). + * Note that the base quality rank sum test can not be calculated for homozygous sites. */ public class BaseQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("BaseQRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 2d1d1978c..c4025a25c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -46,7 +46,8 @@ import java.util.*; /** * Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation * being seen on only the forward or only the reverse strand) in the reads? More bias is - * indicative of false positive calls. + * indicative of false positive calls. Note that the fisher strand test may not be + * calculated for certain complex indel cases or for multi-allelic sites. */ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { private static final String FS = "FS"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index c142109fa..94b0636f4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -52,6 +52,8 @@ import java.util.*; /** * Consistency of the site with two (and only two) segregating haplotypes. Higher scores * are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls. + * Note that the Haplotype Score is only calculated for sites with read coverage; also, for SNPs, the + * site must be bi-allelic. */ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { private final static boolean DEBUG = false; @@ -180,12 +182,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot final Haplotype haplotype1 = consensusHaplotypeQueue.poll(); Listhlist = new ArrayList(); - hlist.add(new Haplotype(haplotype1.getBasesAsBytes(), 60)); + hlist.add(new Haplotype(haplotype1.getBases(), 60)); for (int k=1; k < haplotypesToCompute; k++) { Haplotype haplotype2 = consensusHaplotypeQueue.poll(); if(haplotype2 == null ) { haplotype2 = haplotype1; } // Sometimes only the reference haplotype can be found - hlist.add(new Haplotype(haplotype2.getBasesAsBytes(), 20)); + hlist.add(new Haplotype(haplotype2.getBases(), 20)); } return hlist; } else @@ -229,8 +231,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot } private Haplotype getConsensusHaplotype(final Haplotype haplotypeA, final Haplotype haplotypeB) { - final byte[] a = haplotypeA.getBasesAsBytes(); - final byte[] b = haplotypeB.getBasesAsBytes(); + final byte[] a = haplotypeA.getBases(); + final byte[] b = haplotypeB.getBases(); if (a.length != b.length) { throw new ReviewedStingException("Haplotypes a and b must be of same length"); @@ -313,7 +315,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot // actually be a miscall in a matching direction, which would happen at a e / 3 rate. If b != c, then // the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch. // so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n - final byte[] haplotypeBases = haplotype.getBasesAsBytes(); + final byte[] haplotypeBases = haplotype.getBases(); final SAMRecord read = p.getRead(); byte[] readBases = read.getReadBases(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index a14007147..8728e5aa4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -23,7 +23,8 @@ import java.util.Map; * * A continuous generalization of the Hardy-Weinberg test for disequilibrium that works * well with limited coverage per sample. See the 1000 Genomes Phase I release for - * more information. + * more information. Note that the Inbreeding Coefficient will not be calculated for files + * with fewer than a minimum (generally 10) number of samples. */ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 157c615d7..9857c339f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -16,6 +16,7 @@ import java.util.List; /** * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele) + * Note that the mapping quality rank sum test can not be calculated for homozygous sites. */ public class MappingQualityRankSumTest extends RankSumTest { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index ffc852903..b942d9817 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -19,7 +19,8 @@ import java.util.Map; /** * Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth. * - * Low scores are indicative of false positive calls and artifacts. + * Low scores are indicative of false positive calls and artifacts. Note that QualByDepth requires sequencing + * reads associated with the samples with polymorphic genotypes. */ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 27a9306d4..d762af428 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -20,6 +20,7 @@ import java.util.List; /** * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error). + * Note that the read position rank sum test can not be calculated for homozygous sites. */ public class ReadPosRankSumTest extends RankSumTest { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index c9937f3d6..8f4bc0abd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -164,10 +164,6 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") protected Boolean LIST = false; - @Hidden - @Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false) - protected String ASSUME_SINGLE_SAMPLE = null; - @Hidden @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; @@ -213,11 +209,6 @@ public class VariantAnnotator extends RodWalker implements Ann List rodName = Arrays.asList(variantCollection.variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); - // if there are no valid samples, warn the user - if ( samples.size() == 0 ) { - logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired."); - } - if ( USE_ALL_ANNOTATIONS ) engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit()); else @@ -301,9 +292,9 @@ public class VariantAnnotator extends RodWalker implements Ann Map stratifiedContexts; if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) { if ( ! context.hasExtendedEventPileup() ) { - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup(), ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup()); } else { - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup(), ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup()); } if ( stratifiedContexts != null ) { annotatedVCs = new ArrayList(VCs.size()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index 503d87cbe..c7e577393 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.HashSet; import java.util.Set; -import java.util.TreeSet; /** @@ -71,12 +70,7 @@ public class UGCalcLikelihoods extends LocusWalker public void initialize() { // get all of the unique sample names - // if we're supposed to assume a single sample, do so - Set samples = new TreeSet(); - if ( UAC.ASSUME_SINGLE_SAMPLE != null ) - samples.add(UAC.ASSUME_SINGLE_SAMPLE); - else - samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 07d9892a1..62218416d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -96,11 +96,6 @@ public class UnifiedArgumentCollection { @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false) public RodBinding alleles; - // control the error modes - @Hidden - @Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false) - public String ASSUME_SINGLE_SAMPLE = null; - /** * The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base * is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value. @@ -170,7 +165,6 @@ public class UnifiedArgumentCollection { uac.GenotypingMode = GenotypingMode; uac.OutputMode = OutputMode; uac.COMPUTE_SLOD = COMPUTE_SLOD; - uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE; uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING; uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING; uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 72dc217e1..bdd4e2c65 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -206,12 +206,7 @@ public class UnifiedGenotyper extends LocusWalker samples = new TreeSet(); - if ( UAC.ASSUME_SINGLE_SAMPLE != null ) - samples.add(UAC.ASSUME_SINGLE_SAMPLE); - else - samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // initialize the verbose writer if ( verboseWriter != null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 993a434ac..cee128a6a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -106,12 +106,7 @@ public class UnifiedGenotyperEngine { // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, - // get the number of samples - // if we're supposed to assume a single sample, do so - UAC.ASSUME_SINGLE_SAMPLE != null ? - new TreeSet(Arrays.asList(UAC.ASSUME_SINGLE_SAMPLE)) : - SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader())); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader())); } @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) @@ -253,7 +248,7 @@ public class UnifiedGenotyperEngine { pileup = rawContext.getExtendedEventPileup(); else if (rawContext.hasBasePileup()) pileup = rawContext.getBasePileup(); - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc); } @@ -435,7 +430,7 @@ public class UnifiedGenotyperEngine { pileup = rawContext.getExtendedEventPileup(); else if (rawContext.hasBasePileup()) pileup = rawContext.getBasePileup(); - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); } @@ -569,7 +564,7 @@ public class UnifiedGenotyperEngine { return null; // stratify the AlignmentContext and cut by sample - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); } else { @@ -586,12 +581,12 @@ public class UnifiedGenotyperEngine { return null; // stratify the AlignmentContext and cut by sample - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); } } else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) { // stratify the AlignmentContext and cut by sample - stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE); + stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup()); if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) { int numDeletions = 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 3b3f54b05..200a250f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -205,7 +205,7 @@ public class HaplotypeIndelErrorModel { byte haplotypeBase; if (haplotypeIndex < RIGHT_ALIGN_INDEX) - haplotypeBase = haplotype.getBasesAsBytes()[haplotypeIndex]; + haplotypeBase = haplotype.getBases()[haplotypeIndex]; else haplotypeBase = (byte)0; // dummy @@ -217,7 +217,7 @@ public class HaplotypeIndelErrorModel { if (readQual > 3) pRead += pBaseRead; haplotypeIndex++; - if (haplotypeIndex >= haplotype.getBasesAsBytes().length) + if (haplotypeIndex >= haplotype.getBases().length) haplotypeIndex = RIGHT_ALIGN_INDEX; //System.out.format("H:%c R:%c RQ:%d HI:%d %4.5f %4.5f\n", haplotypeBase, readBase, (int)readQual, haplotypeIndex, pBaseRead, pRead); } @@ -227,8 +227,8 @@ public class HaplotypeIndelErrorModel { System.out.println(read.getReadName()); System.out.print("Haplotype:"); - for (int k=0; k LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX) - haplotypeBase = haplotype.getBasesAsBytes()[indX-1]; + haplotypeBase = haplotype.getBases()[indX-1]; else haplotypeBase = readBase; @@ -296,8 +296,8 @@ public class HaplotypeIndelErrorModel { System.out.println(read.getReadName()); System.out.print("Haplotype:"); - for (int k=0; k { // For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010]. // TODO -- we need a better solution here GATKSAMRecord read = aRead.getRead(); - read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254)); + if ( read.getMappingQuality() != 255 ) // 255 == Unknown, so don't modify it + read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254)); // before we fix the attribute tags we first need to make sure we have enough of the reference sequence int neededBasesToLeft = leftmostIndex - read.getAlignmentStart(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 11ff99ce3..b28980d9c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -381,7 +381,7 @@ public class PairHMMIndelErrorModel { // todo -- refactor into separate function for (Allele a: haplotypeMap.keySet()) { Haplotype haplotype = haplotypeMap.get(a); - byte[] haplotypeBases = haplotype.getBasesAsBytes(); + byte[] haplotypeBases = haplotype.getBases(); double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length]; double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length]; @@ -555,7 +555,7 @@ public class PairHMMIndelErrorModel { long indStart = start - haplotype.getStartPosition(); long indStop = stop - haplotype.getStartPosition(); - byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBasesAsBytes(), + byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), (int)indStart, (int)indStop); double readLikelihood; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java deleted file mode 100644 index 306509d0c..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java +++ /dev/null @@ -1,118 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.phasing; - -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; - -import java.util.*; - -/** - * Merges read-back-phased and phase-by-transmission files. - */ -public class MergeAndMatchHaplotypes extends RodWalker { - @Output - protected VCFWriter vcfWriter = null; - - @Input(fullName="pbt", shortName = "pbt", doc="Input VCF truth file", required=true) - public RodBinding pbtTrack; - - @Input(fullName="rbp", shortName = "rbp", doc="Input VCF truth file", required=true) - public RodBinding rbpTrack; - - private Map pbtCache = new HashMap(); - private Map rbpCache = new HashMap(); - - private final String SOURCE_NAME = "MergeReadBackedAndTransmissionPhasedVariants"; - - public void initialize() { - ArrayList rodNames = new ArrayList(); - rodNames.add(pbtTrack.getName()); - - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); - Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); - Set headerLines = new HashSet(); - headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); - - vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples)); - } - - @Override - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (tracker != null) { - Collection pbts = tracker.getValues(pbtTrack, ref.getLocus()); - Collection rbps = tracker.getValues(rbpTrack, ref.getLocus()); - - VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; - VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; - - if (pbt != null && rbp != null) { - Map genotypes = pbt.getGenotypes(); - - if (!rbp.isFiltered()) { - for (String sample : rbp.getSampleNames()) { - Genotype rbpg = rbp.getGenotype(sample); - Genotype pbtg = pbt.getGenotype(sample); - - // Propagate read-backed phasing information to genotypes unphased by transmission - //if (!pbtg.isPhased() && rbpCache.containsKey(sample)) { - if (!pbtg.isPhased() && rbpg.isPhased() && rbpCache.containsKey(sample)) { - boolean orientationMatches = rbpCache.get(sample).sameGenotype(pbtCache.get(sample), false); - - if (orientationMatches) { - pbtg = rbpg; - } else { - List fwdAlleles = rbpg.getAlleles(); - List revAlleles = new ArrayList(); - - for (int i = fwdAlleles.size() - 1; i >= 0; i--) { - revAlleles.add(fwdAlleles.get(i)); - } - - pbtg = new Genotype(sample, revAlleles, rbpg.getNegLog10PError(), rbpg.getFilters(), rbpg.getAttributes(), rbpg.isPhased()); - } - } - - genotypes.put(sample, pbtg); - - // Update the cache - if (/*rbpg.isPhased() &&*/ rbpg.isHet()) { - rbpCache.put(sample, rbpg); - pbtCache.put(sample, pbtg); - } else if (!rbpg.isPhased()) { - rbpCache.remove(sample); - pbtCache.remove(sample); - } - } - } - - VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes()); - vcfWriter.add(newvc); - } - } - - return null; - } - - @Override - public Integer reduceInit() { - return null; - } - - @Override - public Integer reduce(Integer value, Integer sum) { - return null; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index ce2ca2c28..df682f215 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -33,8 +33,8 @@ import java.util.LinkedHashMap; import java.util.List; public class Haplotype { - protected byte[] bases = null; - protected double[] quals = null; + protected final byte[] bases; + protected final double[] quals; private GenomeLoc genomeLocation = null; private boolean isReference = false; @@ -69,6 +69,11 @@ public class Haplotype { this.isReference = isRef; } + @Override + public boolean equals( Object h ) { + return h instanceof Haplotype && Arrays.equals(bases, ((Haplotype) h).bases); + } + public double getQualitySum() { double s = 0; for (int k=0; k < bases.length; k++) { @@ -88,7 +93,7 @@ public class Haplotype { public double[] getQuals() { return quals; } - public byte[] getBasesAsBytes() { + public byte[] getBases() { return bases; } @@ -100,7 +105,6 @@ public class Haplotype { return genomeLocation.getStop(); } - public boolean isReference() { return isReference; } diff --git a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java index a5ac10250..15d34a348 100644 --- a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java +++ b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java @@ -1,10 +1,5 @@ package org.broadinstitute.sting.utils; -import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; -import com.google.java.contract.Requires; - -import java.io.PrintStream; /** * A useful simple system for timing code. This code is not thread safe! @@ -13,11 +8,6 @@ import java.io.PrintStream; * Date: Dec 10, 2010 * Time: 9:07:44 AM */ -@Invariant({ - "elapsed >= 0", - "startTime >= 0", - "name != null", - "! running || startTime > 0"}) public class SimpleTimer { final private String name; private long elapsed = 0l; @@ -27,7 +17,6 @@ public class SimpleTimer { /** * Creates an anonymous simple timer */ - @Ensures("name != null && name.equals(\"Anonymous\")") public SimpleTimer() { this("Anonymous"); } @@ -36,8 +25,6 @@ public class SimpleTimer { * Creates a simple timer named name * @param name of the timer, must not be null */ - @Requires("name != null") - @Ensures("this.name != null && this.name.equals(name)") public SimpleTimer(String name) { this.name = name; } @@ -45,7 +32,6 @@ public class SimpleTimer { /** * @return the name associated with this timer */ - @Ensures("result != null") public synchronized String getName() { return name; } @@ -56,8 +42,6 @@ public class SimpleTimer { * * @return this object, for programming convenience */ - @Requires("running == false") - @Ensures({"result != null", "elapsed == 0l"}) public synchronized SimpleTimer start() { elapsed = 0l; restart(); @@ -71,8 +55,6 @@ public class SimpleTimer { * * @return this object, for programming convenience */ - @Requires("running == false") - @Ensures("result != null") public synchronized SimpleTimer restart() { running = true; startTime = currentTime(); @@ -99,8 +81,6 @@ public class SimpleTimer { * * @return this object, for programming convenience */ - @Requires("running == true") - @Ensures({"result != null", "elapsed >= old(elapsed)", "running == false"}) public synchronized SimpleTimer stop() { running = false; elapsed += currentTime() - startTime; @@ -113,9 +93,6 @@ public class SimpleTimer { * * @return this time, in seconds */ - @Ensures({ - "result >= (elapsed/1000.0)", - "result >= 0"}) public synchronized double getElapsedTime() { return (running ? (currentTime() - startTime + elapsed) : elapsed) / 1000.0; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 8e887c32a..189f643d4 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -124,6 +124,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("using expression", spec); } + @Test + public void testUsingExpressionWithID() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1, + Arrays.asList("4a6f0675242f685e9072c1da5ad9e715")); + executeTest("using expression with ID", spec); + } + @Test public void testTabixAnnotations() { final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java deleted file mode 100644 index cf6b4e581..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.phasing; - -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; - -import java.util.Arrays; - -public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest { - private static String mergeAndMatchHaplotypesTestDataRoot = validationDataLocation + "/MergeAndMatchHaplotypes"; - private static String fundamentalTestPBTVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.vcf"; - private static String fundamentalTestRBPVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.rbp.vcf"; - - @Test - public void testBasicFunctionality() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T MergeAndMatchHaplotypes", - "-R " + b37KGReference, - "--pbt " + fundamentalTestPBTVCF, - "--rbp " + fundamentalTestRBPVCF, - "-o %s" - ), - 1, - Arrays.asList("") - ); - executeTest("testBasicMergeAndMatchHaplotypesFunctionality", spec); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index cd2493dde..cbfe0ceab 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -9,7 +9,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval"; private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; - private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; + private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf"; private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; @@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "b0565ac61b2860248e4abd478a177b5e"; + String md5 = "7425ca5c439afd7bb33ed5cfea02c2b3"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( @@ -369,7 +369,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "--eval " + fundamentalTestSNPsVCF, "-noEV", "-EV CompOverlap", - "-sn HG00625", + "-sn NA12045", "-noST", "-L " + fundamentalTestSNPsVCF, "-o %s"