Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
bb448b27d2
|
|
@ -131,7 +131,7 @@ public class AlignmentContextUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup, String assumedSingleSample) {
|
||||
public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup) {
|
||||
return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ import java.util.List;
|
|||
|
||||
|
||||
/**
|
||||
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
|
||||
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele).
|
||||
* Note that the base quality rank sum test can not be calculated for homozygous sites.
|
||||
*/
|
||||
public class BaseQualityRankSumTest extends RankSumTest {
|
||||
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
|
||||
|
|
|
|||
|
|
@ -46,7 +46,8 @@ import java.util.*;
|
|||
/**
|
||||
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
|
||||
* being seen on only the forward or only the reverse strand) in the reads? More bias is
|
||||
* indicative of false positive calls.
|
||||
* indicative of false positive calls. Note that the fisher strand test may not be
|
||||
* calculated for certain complex indel cases or for multi-allelic sites.
|
||||
*/
|
||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
private static final String FS = "FS";
|
||||
|
|
|
|||
|
|
@ -52,6 +52,8 @@ import java.util.*;
|
|||
/**
|
||||
* Consistency of the site with two (and only two) segregating haplotypes. Higher scores
|
||||
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
|
||||
* Note that the Haplotype Score is only calculated for sites with read coverage; also, for SNPs, the
|
||||
* site must be bi-allelic.
|
||||
*/
|
||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
|
|
@ -180,12 +182,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
final Haplotype haplotype1 = consensusHaplotypeQueue.poll();
|
||||
|
||||
List<Haplotype>hlist = new ArrayList<Haplotype>();
|
||||
hlist.add(new Haplotype(haplotype1.getBasesAsBytes(), 60));
|
||||
hlist.add(new Haplotype(haplotype1.getBases(), 60));
|
||||
|
||||
for (int k=1; k < haplotypesToCompute; k++) {
|
||||
Haplotype haplotype2 = consensusHaplotypeQueue.poll();
|
||||
if(haplotype2 == null ) { haplotype2 = haplotype1; } // Sometimes only the reference haplotype can be found
|
||||
hlist.add(new Haplotype(haplotype2.getBasesAsBytes(), 20));
|
||||
hlist.add(new Haplotype(haplotype2.getBases(), 20));
|
||||
}
|
||||
return hlist;
|
||||
} else
|
||||
|
|
@ -229,8 +231,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
}
|
||||
|
||||
private Haplotype getConsensusHaplotype(final Haplotype haplotypeA, final Haplotype haplotypeB) {
|
||||
final byte[] a = haplotypeA.getBasesAsBytes();
|
||||
final byte[] b = haplotypeB.getBasesAsBytes();
|
||||
final byte[] a = haplotypeA.getBases();
|
||||
final byte[] b = haplotypeB.getBases();
|
||||
|
||||
if (a.length != b.length) {
|
||||
throw new ReviewedStingException("Haplotypes a and b must be of same length");
|
||||
|
|
@ -313,7 +315,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
// actually be a miscall in a matching direction, which would happen at a e / 3 rate. If b != c, then
|
||||
// the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch.
|
||||
// so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n
|
||||
final byte[] haplotypeBases = haplotype.getBasesAsBytes();
|
||||
final byte[] haplotypeBases = haplotype.getBases();
|
||||
final SAMRecord read = p.getRead();
|
||||
byte[] readBases = read.getReadBases();
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,8 @@ import java.util.Map;
|
|||
*
|
||||
* A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
|
||||
* well with limited coverage per sample. See the 1000 Genomes Phase I release for
|
||||
* more information.
|
||||
* more information. Note that the Inbreeding Coefficient will not be calculated for files
|
||||
* with fewer than a minimum (generally 10) number of samples.
|
||||
*/
|
||||
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import java.util.List;
|
|||
|
||||
/**
|
||||
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
|
||||
* Note that the mapping quality rank sum test can not be calculated for homozygous sites.
|
||||
*/
|
||||
public class MappingQualityRankSumTest extends RankSumTest {
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,8 @@ import java.util.Map;
|
|||
/**
|
||||
* Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
|
||||
*
|
||||
* Low scores are indicative of false positive calls and artifacts.
|
||||
* Low scores are indicative of false positive calls and artifacts. Note that QualByDepth requires sequencing
|
||||
* reads associated with the samples with polymorphic genotypes.
|
||||
*/
|
||||
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import java.util.List;
|
|||
|
||||
/**
|
||||
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
|
||||
* Note that the read position rank sum test can not be calculated for homozygous sites.
|
||||
*/
|
||||
public class ReadPosRankSumTest extends RankSumTest {
|
||||
|
||||
|
|
|
|||
|
|
@ -164,10 +164,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
|
||||
protected Boolean LIST = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
|
||||
protected String ASSUME_SINGLE_SAMPLE = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||
protected boolean indelsOnly = false;
|
||||
|
|
@ -213,11 +209,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
List<String> rodName = Arrays.asList(variantCollection.variants.getName());
|
||||
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
|
||||
|
||||
// if there are no valid samples, warn the user
|
||||
if ( samples.size() == 0 ) {
|
||||
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
|
||||
}
|
||||
|
||||
if ( USE_ALL_ANNOTATIONS )
|
||||
engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
|
||||
else
|
||||
|
|
@ -301,9 +292,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
Map<String, AlignmentContext> stratifiedContexts;
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
|
||||
if ( ! context.hasExtendedEventPileup() ) {
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup(), ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
|
||||
} else {
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup(), ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getExtendedEventPileup());
|
||||
}
|
||||
if ( stratifiedContexts != null ) {
|
||||
annotatedVCs = new ArrayList<VariantContext>(VCs.size());
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -71,12 +70,7 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
|
|||
|
||||
public void initialize() {
|
||||
// get all of the unique sample names
|
||||
// if we're supposed to assume a single sample, do so
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||
else
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
|
||||
|
||||
|
|
|
|||
|
|
@ -96,11 +96,6 @@ public class UnifiedArgumentCollection {
|
|||
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false)
|
||||
public RodBinding<VariantContext> alleles;
|
||||
|
||||
// control the error modes
|
||||
@Hidden
|
||||
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)
|
||||
public String ASSUME_SINGLE_SAMPLE = null;
|
||||
|
||||
/**
|
||||
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
|
||||
* is capped by the mapping quality so that bases on reads with low mapping quality may get filtered out depending on this value.
|
||||
|
|
@ -170,7 +165,6 @@ public class UnifiedArgumentCollection {
|
|||
uac.GenotypingMode = GenotypingMode;
|
||||
uac.OutputMode = OutputMode;
|
||||
uac.COMPUTE_SLOD = COMPUTE_SLOD;
|
||||
uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE;
|
||||
uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING;
|
||||
uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
uac.MIN_BASE_QUALTY_SCORE = MIN_BASE_QUALTY_SCORE;
|
||||
|
|
|
|||
|
|
@ -206,12 +206,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
**/
|
||||
public void initialize() {
|
||||
// get all of the unique sample names
|
||||
// if we're supposed to assume a single sample, do so
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||
else
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
// initialize the verbose writer
|
||||
if ( verboseWriter != null )
|
||||
|
|
|
|||
|
|
@ -106,12 +106,7 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Requires({"toolkit != null", "UAC != null"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null,
|
||||
// get the number of samples
|
||||
// if we're supposed to assume a single sample, do so
|
||||
UAC.ASSUME_SINGLE_SAMPLE != null ?
|
||||
new TreeSet<String>(Arrays.asList(UAC.ASSUME_SINGLE_SAMPLE)) :
|
||||
SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
|
||||
|
|
@ -253,7 +248,7 @@ public class UnifiedGenotyperEngine {
|
|||
pileup = rawContext.getExtendedEventPileup();
|
||||
else if (rawContext.hasBasePileup())
|
||||
pileup = rawContext.getBasePileup();
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
||||
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
|
||||
}
|
||||
|
|
@ -435,7 +430,7 @@ public class UnifiedGenotyperEngine {
|
|||
pileup = rawContext.getExtendedEventPileup();
|
||||
else if (rawContext.hasBasePileup())
|
||||
pileup = rawContext.getBasePileup();
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
||||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
|
||||
}
|
||||
|
|
@ -569,7 +564,7 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
|
||||
} else {
|
||||
|
||||
|
|
@ -586,12 +581,12 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
}
|
||||
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
|
||||
|
||||
if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) {
|
||||
int numDeletions = 0;
|
||||
|
|
|
|||
|
|
@ -205,7 +205,7 @@ public class HaplotypeIndelErrorModel {
|
|||
|
||||
byte haplotypeBase;
|
||||
if (haplotypeIndex < RIGHT_ALIGN_INDEX)
|
||||
haplotypeBase = haplotype.getBasesAsBytes()[haplotypeIndex];
|
||||
haplotypeBase = haplotype.getBases()[haplotypeIndex];
|
||||
else
|
||||
haplotypeBase = (byte)0; // dummy
|
||||
|
||||
|
|
@ -217,7 +217,7 @@ public class HaplotypeIndelErrorModel {
|
|||
if (readQual > 3)
|
||||
pRead += pBaseRead;
|
||||
haplotypeIndex++;
|
||||
if (haplotypeIndex >= haplotype.getBasesAsBytes().length)
|
||||
if (haplotypeIndex >= haplotype.getBases().length)
|
||||
haplotypeIndex = RIGHT_ALIGN_INDEX;
|
||||
//System.out.format("H:%c R:%c RQ:%d HI:%d %4.5f %4.5f\n", haplotypeBase, readBase, (int)readQual, haplotypeIndex, pBaseRead, pRead);
|
||||
}
|
||||
|
|
@ -227,8 +227,8 @@ public class HaplotypeIndelErrorModel {
|
|||
System.out.println(read.getReadName());
|
||||
System.out.print("Haplotype:");
|
||||
|
||||
for (int k=0; k <haplotype.getBasesAsBytes().length; k++) {
|
||||
System.out.format("%c ", haplotype.getBasesAsBytes()[k]);
|
||||
for (int k=0; k <haplotype.getBases().length; k++) {
|
||||
System.out.format("%c ", haplotype.getBases()[k]);
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
|
|
@ -246,8 +246,8 @@ public class HaplotypeIndelErrorModel {
|
|||
|
||||
System.out.println("Haplotype:");
|
||||
|
||||
for (int k=initialIndexInHaplotype; k <haplotype.getBasesAsBytes().length; k++) {
|
||||
System.out.format("%c ", haplotype.getBasesAsBytes()[k]);
|
||||
for (int k=initialIndexInHaplotype; k <haplotype.getBases().length; k++) {
|
||||
System.out.format("%c ", haplotype.getBases()[k]);
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
|
|
@ -275,7 +275,7 @@ public class HaplotypeIndelErrorModel {
|
|||
|
||||
byte haplotypeBase;
|
||||
if (indX > LEFT_ALIGN_INDEX && indX < RIGHT_ALIGN_INDEX)
|
||||
haplotypeBase = haplotype.getBasesAsBytes()[indX-1];
|
||||
haplotypeBase = haplotype.getBases()[indX-1];
|
||||
else
|
||||
haplotypeBase = readBase;
|
||||
|
||||
|
|
@ -296,8 +296,8 @@ public class HaplotypeIndelErrorModel {
|
|||
System.out.println(read.getReadName());
|
||||
System.out.print("Haplotype:");
|
||||
|
||||
for (int k=0; k <haplotype.getBasesAsBytes().length; k++) {
|
||||
System.out.format("%c ", haplotype.getBasesAsBytes()[k]);
|
||||
for (int k=0; k <haplotype.getBases().length; k++) {
|
||||
System.out.format("%c ", haplotype.getBases()[k]);
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
|
|
|
|||
|
|
@ -817,7 +817,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010].
|
||||
// TODO -- we need a better solution here
|
||||
GATKSAMRecord read = aRead.getRead();
|
||||
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
|
||||
if ( read.getMappingQuality() != 255 ) // 255 == Unknown, so don't modify it
|
||||
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
|
||||
|
||||
// before we fix the attribute tags we first need to make sure we have enough of the reference sequence
|
||||
int neededBasesToLeft = leftmostIndex - read.getAlignmentStart();
|
||||
|
|
|
|||
|
|
@ -381,7 +381,7 @@ public class PairHMMIndelErrorModel {
|
|||
// todo -- refactor into separate function
|
||||
for (Allele a: haplotypeMap.keySet()) {
|
||||
Haplotype haplotype = haplotypeMap.get(a);
|
||||
byte[] haplotypeBases = haplotype.getBasesAsBytes();
|
||||
byte[] haplotypeBases = haplotype.getBases();
|
||||
double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
|
||||
double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
|
||||
|
||||
|
|
@ -555,7 +555,7 @@ public class PairHMMIndelErrorModel {
|
|||
long indStart = start - haplotype.getStartPosition();
|
||||
long indStop = stop - haplotype.getStartPosition();
|
||||
|
||||
byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBasesAsBytes(),
|
||||
byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||
(int)indStart, (int)indStop);
|
||||
|
||||
double readLikelihood;
|
||||
|
|
|
|||
|
|
@ -1,118 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Merges read-back-phased and phase-by-transmission files.
|
||||
*/
|
||||
public class MergeAndMatchHaplotypes extends RodWalker<Integer, Integer> {
|
||||
@Output
|
||||
protected VCFWriter vcfWriter = null;
|
||||
|
||||
@Input(fullName="pbt", shortName = "pbt", doc="Input VCF truth file", required=true)
|
||||
public RodBinding<VariantContext> pbtTrack;
|
||||
|
||||
@Input(fullName="rbp", shortName = "rbp", doc="Input VCF truth file", required=true)
|
||||
public RodBinding<VariantContext> rbpTrack;
|
||||
|
||||
private Map<String, Genotype> pbtCache = new HashMap<String, Genotype>();
|
||||
private Map<String, Genotype> rbpCache = new HashMap<String, Genotype>();
|
||||
|
||||
private final String SOURCE_NAME = "MergeReadBackedAndTransmissionPhasedVariants";
|
||||
|
||||
public void initialize() {
|
||||
ArrayList<String> rodNames = new ArrayList<String>();
|
||||
rodNames.add(pbtTrack.getName());
|
||||
|
||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit()));
|
||||
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (tracker != null) {
|
||||
Collection<VariantContext> pbts = tracker.getValues(pbtTrack, ref.getLocus());
|
||||
Collection<VariantContext> rbps = tracker.getValues(rbpTrack, ref.getLocus());
|
||||
|
||||
VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null;
|
||||
VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null;
|
||||
|
||||
if (pbt != null && rbp != null) {
|
||||
Map<String, Genotype> genotypes = pbt.getGenotypes();
|
||||
|
||||
if (!rbp.isFiltered()) {
|
||||
for (String sample : rbp.getSampleNames()) {
|
||||
Genotype rbpg = rbp.getGenotype(sample);
|
||||
Genotype pbtg = pbt.getGenotype(sample);
|
||||
|
||||
// Propagate read-backed phasing information to genotypes unphased by transmission
|
||||
//if (!pbtg.isPhased() && rbpCache.containsKey(sample)) {
|
||||
if (!pbtg.isPhased() && rbpg.isPhased() && rbpCache.containsKey(sample)) {
|
||||
boolean orientationMatches = rbpCache.get(sample).sameGenotype(pbtCache.get(sample), false);
|
||||
|
||||
if (orientationMatches) {
|
||||
pbtg = rbpg;
|
||||
} else {
|
||||
List<Allele> fwdAlleles = rbpg.getAlleles();
|
||||
List<Allele> revAlleles = new ArrayList<Allele>();
|
||||
|
||||
for (int i = fwdAlleles.size() - 1; i >= 0; i--) {
|
||||
revAlleles.add(fwdAlleles.get(i));
|
||||
}
|
||||
|
||||
pbtg = new Genotype(sample, revAlleles, rbpg.getNegLog10PError(), rbpg.getFilters(), rbpg.getAttributes(), rbpg.isPhased());
|
||||
}
|
||||
}
|
||||
|
||||
genotypes.put(sample, pbtg);
|
||||
|
||||
// Update the cache
|
||||
if (/*rbpg.isPhased() &&*/ rbpg.isHet()) {
|
||||
rbpCache.put(sample, rbpg);
|
||||
pbtCache.put(sample, pbtg);
|
||||
} else if (!rbpg.isPhased()) {
|
||||
rbpCache.remove(sample);
|
||||
pbtCache.remove(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes());
|
||||
vcfWriter.add(newvc);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -33,8 +33,8 @@ import java.util.LinkedHashMap;
|
|||
import java.util.List;
|
||||
|
||||
public class Haplotype {
|
||||
protected byte[] bases = null;
|
||||
protected double[] quals = null;
|
||||
protected final byte[] bases;
|
||||
protected final double[] quals;
|
||||
private GenomeLoc genomeLocation = null;
|
||||
private boolean isReference = false;
|
||||
|
||||
|
|
@ -69,6 +69,11 @@ public class Haplotype {
|
|||
this.isReference = isRef;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( Object h ) {
|
||||
return h instanceof Haplotype && Arrays.equals(bases, ((Haplotype) h).bases);
|
||||
}
|
||||
|
||||
public double getQualitySum() {
|
||||
double s = 0;
|
||||
for (int k=0; k < bases.length; k++) {
|
||||
|
|
@ -88,7 +93,7 @@ public class Haplotype {
|
|||
public double[] getQuals() {
|
||||
return quals;
|
||||
}
|
||||
public byte[] getBasesAsBytes() {
|
||||
public byte[] getBases() {
|
||||
return bases;
|
||||
}
|
||||
|
||||
|
|
@ -100,7 +105,6 @@ public class Haplotype {
|
|||
return genomeLocation.getStop();
|
||||
}
|
||||
|
||||
|
||||
public boolean isReference() {
|
||||
return isReference;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,5 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* A useful simple system for timing code. This code is not thread safe!
|
||||
|
|
@ -13,11 +8,6 @@ import java.io.PrintStream;
|
|||
* Date: Dec 10, 2010
|
||||
* Time: 9:07:44 AM
|
||||
*/
|
||||
@Invariant({
|
||||
"elapsed >= 0",
|
||||
"startTime >= 0",
|
||||
"name != null",
|
||||
"! running || startTime > 0"})
|
||||
public class SimpleTimer {
|
||||
final private String name;
|
||||
private long elapsed = 0l;
|
||||
|
|
@ -27,7 +17,6 @@ public class SimpleTimer {
|
|||
/**
|
||||
* Creates an anonymous simple timer
|
||||
*/
|
||||
@Ensures("name != null && name.equals(\"Anonymous\")")
|
||||
public SimpleTimer() {
|
||||
this("Anonymous");
|
||||
}
|
||||
|
|
@ -36,8 +25,6 @@ public class SimpleTimer {
|
|||
* Creates a simple timer named name
|
||||
* @param name of the timer, must not be null
|
||||
*/
|
||||
@Requires("name != null")
|
||||
@Ensures("this.name != null && this.name.equals(name)")
|
||||
public SimpleTimer(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
|
@ -45,7 +32,6 @@ public class SimpleTimer {
|
|||
/**
|
||||
* @return the name associated with this timer
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public synchronized String getName() {
|
||||
return name;
|
||||
}
|
||||
|
|
@ -56,8 +42,6 @@ public class SimpleTimer {
|
|||
*
|
||||
* @return this object, for programming convenience
|
||||
*/
|
||||
@Requires("running == false")
|
||||
@Ensures({"result != null", "elapsed == 0l"})
|
||||
public synchronized SimpleTimer start() {
|
||||
elapsed = 0l;
|
||||
restart();
|
||||
|
|
@ -71,8 +55,6 @@ public class SimpleTimer {
|
|||
*
|
||||
* @return this object, for programming convenience
|
||||
*/
|
||||
@Requires("running == false")
|
||||
@Ensures("result != null")
|
||||
public synchronized SimpleTimer restart() {
|
||||
running = true;
|
||||
startTime = currentTime();
|
||||
|
|
@ -99,8 +81,6 @@ public class SimpleTimer {
|
|||
*
|
||||
* @return this object, for programming convenience
|
||||
*/
|
||||
@Requires("running == true")
|
||||
@Ensures({"result != null", "elapsed >= old(elapsed)", "running == false"})
|
||||
public synchronized SimpleTimer stop() {
|
||||
running = false;
|
||||
elapsed += currentTime() - startTime;
|
||||
|
|
@ -113,9 +93,6 @@ public class SimpleTimer {
|
|||
*
|
||||
* @return this time, in seconds
|
||||
*/
|
||||
@Ensures({
|
||||
"result >= (elapsed/1000.0)",
|
||||
"result >= 0"})
|
||||
public synchronized double getElapsedTime() {
|
||||
return (running ? (currentTime() - startTime + elapsed) : elapsed) / 1000.0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,6 +124,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("4a6f0675242f685e9072c1da5ad9e715"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTabixAnnotations() {
|
||||
final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";
|
||||
|
|
|
|||
|
|
@ -1,28 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest {
|
||||
private static String mergeAndMatchHaplotypesTestDataRoot = validationDataLocation + "/MergeAndMatchHaplotypes";
|
||||
private static String fundamentalTestPBTVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.vcf";
|
||||
private static String fundamentalTestRBPVCF = mergeAndMatchHaplotypesTestDataRoot + "/" + "FundamentalsTest.pbt.rbp.vcf";
|
||||
|
||||
@Test
|
||||
public void testBasicFunctionality() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T MergeAndMatchHaplotypes",
|
||||
"-R " + b37KGReference,
|
||||
"--pbt " + fundamentalTestPBTVCF,
|
||||
"--rbp " + fundamentalTestRBPVCF,
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("")
|
||||
);
|
||||
executeTest("testBasicMergeAndMatchHaplotypesFunctionality", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -9,7 +9,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf";
|
||||
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
|
||||
|
||||
private static String cmdRoot = "-T VariantEval" +
|
||||
" -R " + b36KGReference;
|
||||
|
|
@ -359,7 +359,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testPerSampleAndSubsettedSampleHaveSameResults() {
|
||||
String md5 = "b0565ac61b2860248e4abd478a177b5e";
|
||||
String md5 = "7425ca5c439afd7bb33ed5cfea02c2b3";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
|
|
@ -369,7 +369,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"--eval " + fundamentalTestSNPsVCF,
|
||||
"-noEV",
|
||||
"-EV CompOverlap",
|
||||
"-sn HG00625",
|
||||
"-sn NA12045",
|
||||
"-noST",
|
||||
"-L " + fundamentalTestSNPsVCF,
|
||||
"-o %s"
|
||||
|
|
|
|||
Loading…
Reference in New Issue