Added GATKVCFConstants and GATKVCFHeaderLines to consolidate the GATK-specific VCF annotations
* Removed unused annotations (CCC and HWP) * Renamed one of the two GC annotations to "IGC" (for Interval GC) * Revved picard & htsjdk (GATK constants are now removed from htsjdk) * PT 82046038
This commit is contained in:
parent
cf4ac4a388
commit
b73e9d506a
|
|
@ -54,10 +54,7 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
|
|||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypeBuilder;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
|
|
@ -68,6 +65,8 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalcula
|
|||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
|
@ -82,11 +81,11 @@ import java.util.List;
|
|||
@SuppressWarnings("unused")
|
||||
public final class AlleleCountBySample extends GenotypeAnnotation {
|
||||
|
||||
private final static List<String> keyNames = Collections.unmodifiableList(Arrays.asList(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY,VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY));
|
||||
private final static List<String> keyNames = Collections.unmodifiableList(Arrays.asList(GATKVCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY,GATKVCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY));
|
||||
|
||||
private final static List<VCFFormatHeaderLine> descriptors = Collections.unmodifiableList(Arrays.asList(
|
||||
new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"),
|
||||
new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample")
|
||||
GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY),
|
||||
GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY)
|
||||
));
|
||||
|
||||
@Override
|
||||
|
|
@ -120,8 +119,8 @@ public final class AlleleCountBySample extends GenotypeAnnotation {
|
|||
AC[alleleIndex - 1] = alleleCount;
|
||||
AF[alleleIndex - 1] = ((double) alleleCount) / (double) ploidy;
|
||||
}
|
||||
gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, AC);
|
||||
gb.attribute(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, AF);
|
||||
gb.attribute(GATKVCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, AC);
|
||||
gb.attribute(GATKVCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, AF);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -54,9 +54,10 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -75,10 +76,10 @@ import java.util.*;
|
|||
*/
|
||||
public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation {
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
|
||||
@Override
|
||||
protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) {
|
||||
|
|
|
|||
|
|
@ -51,11 +51,11 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -73,10 +73,10 @@ import java.util.*;
|
|||
*/
|
||||
public class ClippingRankSumTest extends RankSumTest {
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList("ClippingRankSum"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.CLIPPING_RANK_SUM_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ClippingRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
|
||||
@Override
|
||||
protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) {
|
||||
|
|
|
|||
|
|
@ -59,9 +59,10 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnota
|
|||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -91,7 +92,6 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
|||
private final static boolean ENABLE_DEBUGGING = false;
|
||||
private final static Logger logger = Logger.getLogger(FisherStrand.class);
|
||||
|
||||
private static final String FS = "FS";
|
||||
private static final double MIN_PVALUE = 1E-320;
|
||||
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
|
||||
private static final int MIN_COUNT = ARRAY_DIM;
|
||||
|
|
@ -151,17 +151,17 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
|||
*/
|
||||
protected Map<String, Object> annotationForOneTable(final double pValue) {
|
||||
final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(Math.max(pValue, MIN_PVALUE))); // prevent INFINITYs
|
||||
return Collections.singletonMap(FS, value);
|
||||
return Collections.singletonMap(getKeyNames().get(0), value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() {
|
||||
return Collections.singletonList(FS);
|
||||
return Collections.singletonList(GATKVCFConstants.FISHER_STRAND_KEY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Collections.singletonList(new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
|
||||
return Collections.singletonList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -269,7 +269,7 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
|||
*/
|
||||
private void printTable(final String name, final int[][] table) {
|
||||
if ( ENABLE_DEBUGGING ) {
|
||||
final String pValue = (String)annotationForOneTable(pValueForContingencyTable(table)).get(FS);
|
||||
final String pValue = (String)annotationForOneTable(pValueForContingencyTable(table)).get(getKeyNames().get(0));
|
||||
logger.info(String.format("FS %s (REF+, REF-, ALT+, ALT-) = (%d, %d, %d, %d) = %s",
|
||||
name, table[0][0], table[0][1], table[1][0], table[1][1], pValue));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,9 +58,9 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -85,14 +85,14 @@ public class GCContent extends InfoFieldAnnotation {
|
|||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
double content = computeGCContent(ref);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", content));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("GC"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.GC_CONTENT_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("GC", 1, VCFHeaderLineType.Integer, "GC content around the variant (see docs for window size details)")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(); }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
|
|
|
|||
|
|
@ -61,8 +61,9 @@ import org.broadinstitute.gatk.utils.MathUtils;
|
|||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -74,12 +75,6 @@ import java.util.*;
|
|||
|
||||
public class GenotypeSummaries extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation {
|
||||
|
||||
public final static String CCC = "CCC";
|
||||
public final static String NCC = "NCC";
|
||||
public final static String HWP = "HWP";
|
||||
public final static String GQ_MEAN = "GQ_MEAN";
|
||||
public final static String GQ_STDDEV = "GQ_STDDEV";
|
||||
|
||||
@Override
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
|
|
@ -91,7 +86,7 @@ public class GenotypeSummaries extends InfoFieldAnnotation implements ActiveRegi
|
|||
return null;
|
||||
|
||||
final Map<String,Object> returnMap = new HashMap<>();
|
||||
returnMap.put(NCC, vc.getNoCallCount());
|
||||
returnMap.put(GATKVCFConstants.NOCALL_CHROM_KEY, vc.getNoCallCount());
|
||||
|
||||
final MathUtils.RunningAverage average = new MathUtils.RunningAverage();
|
||||
for( final Genotype g : vc.getGenotypes() ) {
|
||||
|
|
@ -100,9 +95,9 @@ public class GenotypeSummaries extends InfoFieldAnnotation implements ActiveRegi
|
|||
}
|
||||
}
|
||||
if( average.observationCount() > 0L ) {
|
||||
returnMap.put(GQ_MEAN, String.format("%.2f", average.mean()));
|
||||
returnMap.put(GATKVCFConstants.GQ_MEAN_KEY, String.format("%.2f", average.mean()));
|
||||
if( average.observationCount() > 1L ) {
|
||||
returnMap.put(GQ_STDDEV, String.format("%.2f", average.stddev()));
|
||||
returnMap.put(GATKVCFConstants.GQ_STDEV_KEY, String.format("%.2f", average.stddev()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -111,17 +106,9 @@ public class GenotypeSummaries extends InfoFieldAnnotation implements ActiveRegi
|
|||
|
||||
@Override
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(CCC, NCC, HWP, GQ_MEAN, GQ_STDDEV);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(
|
||||
new VCFInfoHeaderLine(CCC, 1, VCFHeaderLineType.Integer, "Number of called chromosomes"),
|
||||
new VCFInfoHeaderLine(NCC, 1, VCFHeaderLineType.Integer, "Number of no-called samples"),
|
||||
new VCFInfoHeaderLine(HWP, 1, VCFHeaderLineType.Float, "P value from test of Hardy Weinberg Equilibrium"),
|
||||
new VCFInfoHeaderLine(GQ_MEAN, 1, VCFHeaderLineType.Float, "Mean of all GQ values"),
|
||||
new VCFInfoHeaderLine(GQ_STDDEV, 1, VCFHeaderLineType.Float, "Standard deviation of all GQ values")
|
||||
);
|
||||
GATKVCFConstants.NOCALL_CHROM_KEY,
|
||||
GATKVCFConstants.GQ_MEAN_KEY,
|
||||
GATKVCFConstants.GQ_STDEV_KEY);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,7 +64,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
|||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
|
|
@ -73,6 +72,8 @@ import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
|
|||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
|
@ -151,7 +152,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
}
|
||||
|
||||
// annotate the score in the info field
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
final Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%.4f", scoreRA.mean()));
|
||||
return map;
|
||||
}
|
||||
|
|
@ -173,8 +174,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
int haplotypesToCompute = vc.getAlternateAlleles().size() + 1;
|
||||
|
||||
final PriorityQueue<Haplotype> candidateHaplotypeQueue = new PriorityQueue<Haplotype>(100, new HaplotypeComparator());
|
||||
final PriorityQueue<Haplotype> consensusHaplotypeQueue = new PriorityQueue<Haplotype>(MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER, new HaplotypeComparator());
|
||||
final PriorityQueue<Haplotype> candidateHaplotypeQueue = new PriorityQueue<>(100, new HaplotypeComparator());
|
||||
final PriorityQueue<Haplotype> consensusHaplotypeQueue = new PriorityQueue<>(MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER, new HaplotypeComparator());
|
||||
|
||||
for (final PileupElement p : pileup) {
|
||||
final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus);
|
||||
|
|
@ -214,7 +215,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
// The consensus haplotypes are in a quality-ordered priority queue, so the best haplotypes are just the ones at the front of the queue
|
||||
final Haplotype haplotype1 = consensusHaplotypeQueue.poll();
|
||||
|
||||
List<Haplotype> hlist = new ArrayList<Haplotype>();
|
||||
List<Haplotype> hlist = new ArrayList<>();
|
||||
hlist.add(new Haplotype(haplotype1.getBases(), 60));
|
||||
|
||||
for (int k = 1; k < haplotypesToCompute; k++) {
|
||||
|
|
@ -329,7 +330,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
if (DEBUG) System.out.printf("HAP1: %s%n", haplotypes.get(0));
|
||||
if (DEBUG) System.out.printf("HAP2: %s%n", haplotypes.get(1));
|
||||
|
||||
final ArrayList<double[]> haplotypeScores = new ArrayList<double[]>();
|
||||
final ArrayList<double[]> haplotypeScores = new ArrayList<>();
|
||||
for (final PileupElement p : pileup) {
|
||||
// Score all the reads in the pileup, even the filtered ones
|
||||
final double[] scores = new double[haplotypes.size()];
|
||||
|
|
@ -412,12 +413,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
|||
|
||||
@Override
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList("HaplotypeScore");
|
||||
return Arrays.asList(GATKVCFConstants.HAPLOTYPE_SCORE_KEY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
|
||||
return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0)));
|
||||
}
|
||||
|
||||
private static class Haplotype {
|
||||
|
|
|
|||
|
|
@ -61,11 +61,12 @@ import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
|||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -137,14 +138,14 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
|
|||
|
||||
double pvalue = HardyWeinbergCalculation.hwCalculate(refCount, hetCount, homCount);
|
||||
//System.out.println(refCount + " " + hetCount + " " + homCount + " " + pvalue);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue)));
|
||||
return map;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList("HW"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.HARDY_WEINBERG_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled p-value for Hardy-Weinberg violation")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
}
|
||||
|
|
@ -59,9 +59,10 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAn
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -103,14 +104,14 @@ public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalA
|
|||
return null;
|
||||
}
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", run));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("HRun"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.HOMOPOLYMER_RUN_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HRun", 1, VCFHeaderLineType.Integer, "Largest Contiguous Homopolymer Run of Variant Allele In Either Direction")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
|
|
|
|||
|
|
@ -63,11 +63,12 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnot
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -88,7 +89,6 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
|||
|
||||
private final static Logger logger = Logger.getLogger(InbreedingCoeff.class);
|
||||
private static final int MIN_SAMPLES = 10;
|
||||
private static final String INBREEDING_COEFFICIENT_KEY_NAME = "InbreedingCoeff";
|
||||
private Set<String> founderIds;
|
||||
private int sampleCount;
|
||||
private boolean pedigreeCheckWarningLogged = false;
|
||||
|
|
@ -182,8 +182,8 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
|||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Collections.singletonList(INBREEDING_COEFFICIENT_KEY_NAME); }
|
||||
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.INBREEDING_COEFFICIENT_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Collections.singletonList(new VCFInfoHeaderLine(INBREEDING_COEFFICIENT_KEY_NAME, 1, VCFHeaderLineType.Float, "Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Collections.singletonList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
}
|
||||
|
|
@ -53,8 +53,9 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
|
|||
|
||||
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -73,10 +74,10 @@ import java.util.List;
|
|||
*/
|
||||
public class LikelihoodRankSumTest extends RankSumTest {
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList("LikelihoodRankSum"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.LIKELIHOOD_RANK_SUM_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("LikelihoodRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref haplotype likelihoods")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
|
||||
@Override
|
||||
protected Double getElementForRead(final GATKSAMRecord read, final int refLoc, final MostLikelyAllele mostLikelyAllele) {
|
||||
|
|
|
|||
|
|
@ -62,9 +62,10 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnot
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -95,7 +96,6 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
|
|||
|
||||
private final static Logger logger = Logger.getLogger(MVLikelihoodRatio.class);
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
public static final String MVLR_KEY = "MVLR";
|
||||
private Set<Trio> trios;
|
||||
private boolean walkerIdentityCheckWarningLogged = false;
|
||||
private boolean pedigreeCheckWarningLogged = false;
|
||||
|
|
@ -132,7 +132,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
|
|||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
|
||||
Map<String,Object> attributeMap = new HashMap<String,Object>(1);
|
||||
Map<String,Object> attributeMap = new HashMap<>(1);
|
||||
//double pNoMV = 1.0;
|
||||
double maxMVLR = Double.MIN_VALUE;
|
||||
for ( Trio trio : trios ) {
|
||||
|
|
@ -146,17 +146,16 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
|
|||
//double pSomeMV = 1.0-pNoMV;
|
||||
//toRet.put("MVLR",Math.log10(pSomeMV)-Math.log10(1.0-pSomeMV));
|
||||
if ( Double.compare(maxMVLR,Double.MIN_VALUE) != 0 )
|
||||
attributeMap.put(MVLR_KEY,maxMVLR);
|
||||
attributeMap.put(getKeyNames().get(0), maxMVLR);
|
||||
return attributeMap;
|
||||
}
|
||||
|
||||
// return the names and descriptions used for the VCF INFO meta field
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.MENDEL_VIOLATION_LR_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
|
||||
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
||||
for ( String sample : Arrays.asList(trio.getMaternalID(),trio.getPaternalID(),trio.getChildID()) ) {
|
||||
|
|
|
|||
|
|
@ -53,9 +53,10 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
|
|||
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -80,10 +81,10 @@ import java.util.*;
|
|||
*/
|
||||
public class MappingQualityRankSumTest extends RankSumTest implements StandardAnnotation {
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.MAP_QUAL_RANK_SUM_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
|
||||
@Override
|
||||
protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) {
|
||||
|
|
|
|||
|
|
@ -64,9 +64,8 @@ import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
|||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -97,10 +96,8 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
|||
private final static Logger logger = Logger.getLogger(PossibleDeNovo.class);
|
||||
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
|
||||
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
|
||||
private final int hi_GQ_threshold = 20;
|
||||
private final int lo_GQ_threshold = 10;
|
||||
private final int hi_GQ_threshold = 20; //WARNING - If you change this value, update the description in GATKVCFHeaderLines
|
||||
private final int lo_GQ_threshold = 10; //WARNING - If you change this value, update the description in GATKVCFHeaderLines
|
||||
private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
|
||||
private final int flatNumberOfSamplesCutoff = 4;
|
||||
private Set<Trio> trios;
|
||||
|
|
@ -137,11 +134,11 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
|||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
|
||||
final Map<String,Object> attributeMap = new HashMap<String,Object>(1);
|
||||
final Map<String,Object> attributeMap = new HashMap<>(1);
|
||||
boolean isHighConfDeNovo = false;
|
||||
boolean isLowConfDeNovo = false;
|
||||
final List<String> highConfDeNovoChildren = new ArrayList<String>();
|
||||
final List<String> lowConfDeNovoChildren = new ArrayList<String>();
|
||||
final List<String> highConfDeNovoChildren = new ArrayList<>();
|
||||
final List<String> lowConfDeNovoChildren = new ArrayList<>();
|
||||
for ( final Trio trio : trios ) {
|
||||
if (vc.isBiallelic() && contextHasTrioLikelihoods(vc,trio) && mendelianViolation.isViolation(trio.getMother(),trio.getFather(),trio.getChild(),vc) )
|
||||
{
|
||||
|
|
@ -164,20 +161,15 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
|||
final double AFcutoff = Math.max(flatNumberOfSamplesCutoff,percentNumberOfSamplesCutoff);
|
||||
final int deNovoAlleleCount = vc.getCalledChrCount(vc.getAlternateAllele(0)); //we assume we're biallelic above so use the first alt
|
||||
if ( isHighConfDeNovo && deNovoAlleleCount < AFcutoff )
|
||||
attributeMap.put(HI_CONF_DENOVO_KEY,highConfDeNovoChildren);
|
||||
attributeMap.put(GATKVCFConstants.HI_CONF_DENOVO_KEY,highConfDeNovoChildren);
|
||||
if ( isLowConfDeNovo && deNovoAlleleCount < AFcutoff )
|
||||
attributeMap.put(LO_CONF_DENOVO_KEY,lowConfDeNovoChildren);
|
||||
attributeMap.put(GATKVCFConstants.LO_CONF_DENOVO_KEY,lowConfDeNovoChildren);
|
||||
return attributeMap;
|
||||
}
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
|
||||
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.HI_CONF_DENOVO_KEY, GATKVCFConstants.LO_CONF_DENOVO_KEY); }
|
||||
|
||||
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
||||
for ( String sample : Arrays.asList(trio.getMaternalID(),trio.getPaternalID(),trio.getChildID()) ) {
|
||||
|
|
|
|||
|
|
@ -61,8 +61,9 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnot
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
|
|
@ -197,10 +198,10 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
private final static double IDEAL_HIGH_QD = 30;
|
||||
private final static double JITTER_SIGMA = 3;
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.QUAL_BY_DEPTH_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
|
||||
return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0)));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -57,12 +57,13 @@ import htsjdk.samtools.CigarOperator;
|
|||
import htsjdk.samtools.SAMRecord;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.indels.PairHMMIndelErrorModel;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -85,11 +86,11 @@ import java.util.*;
|
|||
public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotation {
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.READ_POS_RANK_SUM_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"));
|
||||
return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -57,11 +57,11 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -84,7 +84,7 @@ public class SampleList extends InfoFieldAnnotation {
|
|||
if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
StringBuffer samples = new StringBuffer();
|
||||
final StringBuilder samples = new StringBuilder();
|
||||
for ( Genotype genotype : vc.getGenotypesOrderedByName() ) {
|
||||
if ( genotype.isCalled() && !genotype.isHomRef() ){
|
||||
if ( samples.length() > 0 )
|
||||
|
|
@ -97,11 +97,11 @@ public class SampleList extends InfoFieldAnnotation {
|
|||
return null;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put("Samples", samples.toString());
|
||||
map.put(getKeyNames().get(0), samples.toString());
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("Samples"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.SAMPLE_LIST_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,9 +61,10 @@ import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
|||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -121,14 +122,14 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
|
|||
deletions++;
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", depth == 0 ? 0.0 : (double)deletions/(double)depth));
|
||||
return map;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList("Dels"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.SPANNING_DELETIONS_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Dels", 1, VCFHeaderLineType.Float, "Fraction of Reads Containing Spanning Deletions")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
}
|
||||
|
|
@ -56,8 +56,6 @@ import htsjdk.variant.variantcontext.Genotype;
|
|||
import htsjdk.variant.variantcontext.GenotypeBuilder;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
|
|
@ -66,6 +64,8 @@ import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
|
|||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
|
@ -99,8 +99,6 @@ import java.util.Map;
|
|||
|
||||
public class StrandAlleleCountsBySample extends GenotypeAnnotation {
|
||||
|
||||
public final static String STRAND_COUNT_BY_SAMPLE_KEY_NAME = "SAC";
|
||||
|
||||
@Override
|
||||
public void annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
|
|
@ -113,17 +111,15 @@ public class StrandAlleleCountsBySample extends GenotypeAnnotation {
|
|||
if ( ! isAppropriateInput(alleleLikelihoodMap, g) )
|
||||
return;
|
||||
|
||||
gb.attribute(STRAND_COUNT_BY_SAMPLE_KEY_NAME, getStrandCounts(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc));
|
||||
gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, getStrandCounts(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Collections.singletonList(STRAND_COUNT_BY_SAMPLE_KEY_NAME); }
|
||||
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFFormatHeaderLine> getDescriptions() {
|
||||
return Collections.singletonList(
|
||||
new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer,
|
||||
"Number of reads on the forward and reverse strand supporting each allele (including reference)"));
|
||||
return Collections.singletonList(GATKVCFHeaderLines.getFormatLine(getKeyNames().get(0)));
|
||||
}
|
||||
|
||||
private boolean isAppropriateInput(final PerReadAlleleLikelihoodMap map, final Genotype g) {
|
||||
|
|
|
|||
|
|
@ -61,7 +61,8 @@ import htsjdk.variant.variantcontext.Genotype;
|
|||
import htsjdk.variant.variantcontext.GenotypeBuilder;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -92,8 +93,6 @@ import java.util.*;
|
|||
|
||||
public class StrandBiasBySample extends GenotypeAnnotation {
|
||||
|
||||
public final static String STRAND_BIAS_BY_SAMPLE_KEY_NAME = "SB";
|
||||
|
||||
@Override
|
||||
public void annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
|
|
@ -108,14 +107,14 @@ public class StrandBiasBySample extends GenotypeAnnotation {
|
|||
|
||||
final int[][] table = FisherStrand.getContingencyTable(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc, 0);
|
||||
|
||||
gb.attribute(STRAND_BIAS_BY_SAMPLE_KEY_NAME, FisherStrand.getContingencyArray(table));
|
||||
gb.attribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, FisherStrand.getContingencyArray(table));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Collections.singletonList(STRAND_BIAS_BY_SAMPLE_KEY_NAME); }
|
||||
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Collections.singletonList(new VCFFormatHeaderLine(getKeyNames().get(0), 4, VCFHeaderLineType.Integer, "Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.")); }
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Collections.singletonList(GATKVCFHeaderLines.getFormatLine(getKeyNames().get(0))); }
|
||||
|
||||
private boolean isAppropriateInput(final PerReadAlleleLikelihoodMap map, final Genotype g) {
|
||||
return ! (map == null || g == null || !g.isCalled());
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
|
|||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -92,7 +93,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
|||
for ( final VCFHeaderLine line : headerLines) {
|
||||
if ( line instanceof VCFFormatHeaderLine) {
|
||||
final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
|
||||
if ( formatline.getID().equals(VCFConstants.STRAND_BIAS_KEY) ) {
|
||||
if ( formatline.getID().equals(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY) ) {
|
||||
logger.warn("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
|
||||
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
|
||||
return;
|
||||
|
|
@ -123,7 +124,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
|||
// if the genotype and strand bias are provided, calculate the annotation from the Genotype (GT) field
|
||||
if ( vc.hasGenotypes() ) {
|
||||
for (final Genotype g : vc.getGenotypes()) {
|
||||
if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
|
||||
if (g.hasAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY)) {
|
||||
return calculateAnnotationFromGTfield(vc.getGenotypes());
|
||||
}
|
||||
}
|
||||
|
|
@ -176,11 +177,11 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
|||
boolean foundData = false;
|
||||
|
||||
for( final Genotype g : genotypes ) {
|
||||
if( g.isNoCall() || ! g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) )
|
||||
if( g.isNoCall() || ! g.hasAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY) )
|
||||
continue;
|
||||
|
||||
foundData = true;
|
||||
final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME);
|
||||
final String sbbsString = (String) g.getAnyAttribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY);
|
||||
final int[] data = encodeSBBS(sbbsString);
|
||||
if ( passesMinimumThreshold(data, minCount) ) {
|
||||
for( int index = 0; index < sbArray.length; index++ ) {
|
||||
|
|
|
|||
|
|
@ -57,8 +57,9 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -99,8 +100,6 @@ public class StrandOddsRatio extends StrandBiasTest implements StandardAnnotatio
|
|||
private final static double AUGMENTATION_CONSTANT = 1.0;
|
||||
private static final int MIN_COUNT = 0;
|
||||
|
||||
private static final String SOR = "SOR";
|
||||
|
||||
@Override
|
||||
protected Map<String, Object> calculateAnnotationFromGTfield(GenotypesContext genotypes){
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT );
|
||||
|
|
@ -179,16 +178,16 @@ public class StrandOddsRatio extends StrandBiasTest implements StandardAnnotatio
|
|||
*/
|
||||
protected Map<String, Object> annotationForOneTable(final double ratio) {
|
||||
final Object value = String.format("%.3f", ratio);
|
||||
return Collections.singletonMap(SOR, value);
|
||||
return Collections.singletonMap(getKeyNames().get(0), value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Collections.singletonList(new VCFInfoHeaderLine(SOR, 1, VCFHeaderLineType.Float, "Symmetric Odds Ratio of 2x2 contingency table to detect strand bias"));
|
||||
return Collections.singletonList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() {
|
||||
return Collections.singletonList(SOR);
|
||||
return Collections.singletonList(GATKVCFConstants.STRAND_ODDS_RATIO_KEY);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,11 +61,9 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnota
|
|||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -85,9 +83,6 @@ import java.util.*;
|
|||
*/
|
||||
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
private final static Logger logger = Logger.getLogger(TandemRepeatAnnotator.class);
|
||||
private static final String STR_PRESENT = "STR";
|
||||
private static final String REPEAT_UNIT_KEY = "RU";
|
||||
private static final String REPEATS_PER_ALLELE_KEY = "RPA";
|
||||
private boolean walkerIdentityCheckWarningLogged = false;
|
||||
|
||||
@Override
|
||||
|
|
@ -110,33 +105,27 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
|||
if ( !vc.isIndel())
|
||||
return null;
|
||||
|
||||
Pair<List<Integer>,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
|
||||
final Pair<List<Integer>,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
|
||||
if (result == null)
|
||||
return null;
|
||||
|
||||
byte[] repeatUnit = result.second;
|
||||
List<Integer> numUnits = result.first;
|
||||
final byte[] repeatUnit = result.second;
|
||||
final List<Integer> numUnits = result.first;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(STR_PRESENT,true);
|
||||
map.put(REPEAT_UNIT_KEY,new String(repeatUnit));
|
||||
map.put(REPEATS_PER_ALLELE_KEY, numUnits);
|
||||
final Map<String, Object> map = new HashMap<>();
|
||||
map.put(GATKVCFConstants.STR_PRESENT_KEY, true);
|
||||
map.put(GATKVCFConstants.REPEAT_UNIT_KEY, new String(repeatUnit));
|
||||
map.put(GATKVCFConstants.REPEATS_PER_ALLELE_KEY, numUnits);
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
protected static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
|
||||
protected static final VCFInfoHeaderLine[] descriptions = {
|
||||
new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
|
||||
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
|
||||
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
|
||||
|
||||
@Override
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(keyNames);
|
||||
return Arrays.asList(
|
||||
GATKVCFConstants.STR_PRESENT_KEY,
|
||||
GATKVCFConstants.REPEAT_UNIT_KEY,
|
||||
GATKVCFConstants.REPEATS_PER_ALLELE_KEY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); }
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,10 +61,10 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnot
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -125,8 +125,8 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
|||
}
|
||||
}
|
||||
|
||||
final Map<String, Object> toRet = new HashMap<String, Object>(1);
|
||||
final HashSet<Sample> triosToTest = new HashSet<Sample>();
|
||||
final Map<String, Object> toRet = new HashMap<>(1);
|
||||
final HashSet<Sample> triosToTest = new HashSet<>();
|
||||
|
||||
for( final Sample child : trios ) {
|
||||
final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() &&
|
||||
|
|
@ -146,15 +146,15 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
|||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
@Override
|
||||
public List<String> getKeyNames() { return Arrays.asList("TDT"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.TRANSMISSION_DISEQUILIBRIUM_KEY); }
|
||||
|
||||
@Override
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
|
||||
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
||||
private List<Double> calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
|
||||
|
||||
List<Double> pairwiseTDTs = new ArrayList<Double>(10);
|
||||
List<Double> pairwiseTDTs = new ArrayList<>(10);
|
||||
final int HomRefIndex = 0;
|
||||
|
||||
// for each pair of alleles, add the likelihoods
|
||||
|
|
|
|||
|
|
@ -58,9 +58,10 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.IndelUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -100,13 +101,13 @@ public class VariantType extends InfoFieldAnnotation {
|
|||
}
|
||||
}
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%s", type));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("VariantType"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.VARIANT_TYPE_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("VariantType", 1, VCFHeaderLineType.String, "Variant type description")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,6 +70,8 @@ import htsjdk.variant.variantcontext.*;
|
|||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -124,12 +126,6 @@ import java.util.*;
|
|||
@Downsample(by = DownsampleType.NONE)
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||
|
||||
private static final String AVG_INTERVAL_DP_KEY = "IDP";
|
||||
private static final String LOW_COVERAGE_LOCI = "LL";
|
||||
private static final String ZERO_COVERAGE_LOCI = "ZL";
|
||||
private static final String GC_CONTENT_KEY = "GC";
|
||||
|
||||
|
||||
@Output(doc = "File to which interval statistics should be written")
|
||||
private VariantContextWriter vcfWriter = null;
|
||||
|
||||
|
|
@ -150,8 +146,8 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
if (getToolkit().getIntervals() == null || getToolkit().getIntervals().isEmpty())
|
||||
throw new UserException("This tool only works if you provide one or more intervals (use the -L argument). If you want to run whole genome, use -T DepthOfCoverage instead.");
|
||||
|
||||
intervalMap = new LinkedHashMap<GenomeLoc, IntervalStratification>(INITIAL_HASH_SIZE);
|
||||
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());
|
||||
intervalMap = new LinkedHashMap<>(INITIAL_HASH_SIZE);
|
||||
intervalListIterator = new PeekableIterator<>(getToolkit().getIntervals().iterator());
|
||||
|
||||
// get all of the unique sample names for the VCF Header
|
||||
samples = ReadUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
|
@ -224,7 +220,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
*/
|
||||
private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) {
|
||||
// output any intervals that were finished
|
||||
final List<GenomeLoc> toRemove = new LinkedList<GenomeLoc>();
|
||||
final List<GenomeLoc> toRemove = new LinkedList<>();
|
||||
for (GenomeLoc key : intervalMap.keySet()) {
|
||||
if (key.isBefore(refLocus)) {
|
||||
final IntervalStratification intervalStats = intervalMap.get(key);
|
||||
|
|
@ -263,17 +259,17 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) {
|
||||
GenomeLoc interval = stats.getInterval();
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
final List<Allele> alleles = new ArrayList<>();
|
||||
final Map<String, Object> attributes = new HashMap<>();
|
||||
final ArrayList<Genotype> genotypes = new ArrayList<>();
|
||||
|
||||
for (String sample : samples) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
|
||||
SampleStratification sampleStat = stats.getSampleStatistics(sample);
|
||||
gb.attribute(AVG_INTERVAL_DP_KEY, sampleStat.averageCoverage(interval.size()));
|
||||
gb.attribute(LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci());
|
||||
gb.attribute(ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci());
|
||||
gb.attribute(GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY, sampleStat.averageCoverage(interval.size()));
|
||||
gb.attribute(GATKVCFConstants.LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci());
|
||||
gb.attribute(GATKVCFConstants.ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci());
|
||||
gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false));
|
||||
|
||||
genotypes.add(gb.make());
|
||||
|
|
@ -283,11 +279,11 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles);
|
||||
|
||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR);
|
||||
vcb.filters(new LinkedHashSet<String>(statusToStrings(stats.callableStatuses(), true)));
|
||||
vcb.filters(new LinkedHashSet<>(statusToStrings(stats.callableStatuses(), true)));
|
||||
|
||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||
attributes.put(AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size()));
|
||||
attributes.put(GC_CONTENT_KEY, stats.gcContent());
|
||||
attributes.put(GATKVCFConstants.AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size()));
|
||||
attributes.put(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY, stats.gcContent());
|
||||
|
||||
vcb = vcb.attributes(attributes);
|
||||
vcb = vcb.genotypes(genotypes);
|
||||
|
|
@ -347,7 +343,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
* @return a matching set of strings
|
||||
*/
|
||||
private List<String> statusToStrings(Iterable<CallableStatus> statuses, final boolean isInfoField) {
|
||||
List<String> output = new LinkedList<String>();
|
||||
List<String> output = new LinkedList<>();
|
||||
|
||||
for (CallableStatus status : statuses)
|
||||
if ( isInfoField || status != CallableStatus.PASS )
|
||||
|
|
@ -398,19 +394,19 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
* @return A set of VCF header lines
|
||||
*/
|
||||
private static Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<>();
|
||||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||
headerLines.add(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine(GC_CONTENT_KEY, 1, VCFHeaderLineType.Float, "GC Content of the interval"));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AVG_INTERVAL_DP_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
|
||||
headerLines.add(new VCFFormatHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average sample depth across the interval. Sum of the sample specific depth in all loci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine(LOW_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with low coverage (below the minimum coverage) but not zero."));
|
||||
headerLines.add(new VCFFormatHeaderLine(ZERO_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with zero coverage."));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.LOW_COVERAGE_LOCI));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ZERO_COVERAGE_LOCI));
|
||||
|
||||
// FILTER fields
|
||||
for (CallableStatus stat : CallableStatus.values())
|
||||
|
|
|
|||
|
|
@ -54,7 +54,6 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
|
|||
import htsjdk.samtools.SAMUtils;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACcounts;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACset;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
|
|
@ -62,6 +61,7 @@ import org.broadinstitute.gatk.utils.collections.Pair;
|
|||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -319,7 +319,7 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
|
|||
iterator.next();
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println(VCFConstants.MLE_ALLELE_COUNT_KEY + ": " + Arrays.toString(mlInd));
|
||||
System.out.println(GATKVCFConstants.MLE_ALLELE_COUNT_KEY + ": " + Arrays.toString(mlInd));
|
||||
}
|
||||
return new Pair<int[], Double>(mlInd,maxVal);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,11 +60,11 @@ import org.broadinstitute.gatk.utils.GenomeLoc;
|
|||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -287,7 +287,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
if (UAC.referenceSampleName != null && perLaneErrorModels != null)
|
||||
attributes.put(VCFConstants.REFSAMPLE_DEPTH_KEY, ErrorModel.getTotalReferenceDepth(perLaneErrorModels));
|
||||
attributes.put(GATKVCFConstants.REFSAMPLE_DEPTH_KEY, ErrorModel.getTotalReferenceDepth(perLaneErrorModels));
|
||||
|
||||
builder.attributes(attributes);
|
||||
// create the genotypes; no-call everyone for now
|
||||
|
|
|
|||
|
|
@ -54,11 +54,8 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
|
|
@ -75,6 +72,8 @@ import org.broadinstitute.gatk.utils.QualityUtils;
|
|||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -86,10 +85,6 @@ import java.util.*;
|
|||
*/
|
||||
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {
|
||||
|
||||
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
|
||||
|
||||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||
|
||||
protected final AFCalculatorProvider afCalculatorProvider ;
|
||||
|
||||
protected Logger logger;
|
||||
|
|
@ -158,7 +153,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
public Set<VCFInfoHeaderLine> getAppropriateVCFInfoHeaders() {
|
||||
Set<VCFInfoHeaderLine> headerInfo = new HashSet<>();
|
||||
if ( configuration.genotypeArgs.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
|
||||
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
|
||||
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NUMBER_OF_DISCOVERED_ALLELES_KEY));
|
||||
return headerInfo;
|
||||
}
|
||||
|
||||
|
|
@ -262,7 +257,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
//TODO and change the code below accordingly.
|
||||
builder.log10PError(log10Confidence == 0.0 ? -0.0 : log10Confidence);
|
||||
if ( ! passesCallThreshold(phredScaledConfidence) )
|
||||
builder.filter(LOW_QUAL_FILTER_NAME);
|
||||
builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME);
|
||||
|
||||
// create the genotypes
|
||||
|
||||
|
|
@ -571,10 +566,6 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
*/
|
||||
private static AFPriorProvider composeAlleleFrequencyPriorProvider(final int N, final double heterozygosity, final List<Double> inputPriors) {
|
||||
|
||||
final double[] priors = new double[N + 1];
|
||||
double sum = 0.0;
|
||||
final AFPriorProvider result;
|
||||
|
||||
if (!inputPriors.isEmpty()) {
|
||||
// user-specified priors
|
||||
if (inputPriors.size() != N)
|
||||
|
|
@ -652,17 +643,17 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
attributes.putAll(vc.getAttributes());
|
||||
// if the site was down-sampled, record that fact
|
||||
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
|
||||
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
|
||||
attributes.put(GATKVCFConstants.DOWNSAMPLED_KEY, true);
|
||||
|
||||
// add the MLE AC and AF annotations
|
||||
if ( alleleCountsofMLE.size() > 0 ) {
|
||||
attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
|
||||
attributes.put(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
|
||||
final ArrayList<Double> MLEfrequencies = calculateMLEAlleleFrequencies(alleleCountsofMLE, genotypes);
|
||||
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
|
||||
attributes.put(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
|
||||
}
|
||||
|
||||
if ( configuration.genotypeArgs.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
|
||||
attributes.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());
|
||||
attributes.put(GATKVCFConstants.NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());
|
||||
|
||||
|
||||
return attributes;
|
||||
|
|
@ -674,7 +665,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
for (final Allele a : g.getAlleles())
|
||||
if (!a.isNoCall()) AN++;
|
||||
|
||||
final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
|
||||
final ArrayList<Double> MLEfrequencies = new ArrayList<>(alleleCountsofMLE.size());
|
||||
// the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
|
||||
for (final int AC : alleleCountsofMLE )
|
||||
MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN));
|
||||
|
|
|
|||
|
|
@ -69,6 +69,7 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -202,7 +203,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
gb.DP(sampleData.depth);
|
||||
gb.alleles(noCall);
|
||||
if (UAC.annotateAllSitesWithPLs)
|
||||
gb.attribute(UnifiedGenotypingEngine.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true)));
|
||||
gb.attribute(GATKVCFConstants.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true)));
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -79,6 +79,8 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -336,20 +338,19 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
|
||||
// add the pool values for each genotype
|
||||
if (UAC.genotypeArgs.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY) {
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
|
||||
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY));
|
||||
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY));
|
||||
}
|
||||
if (UAC.referenceSampleName != null) {
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth"));
|
||||
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.REFSAMPLE_DEPTH_KEY));
|
||||
}
|
||||
|
||||
if (UAC.annotateAllSitesWithPLs) {
|
||||
headerInfo.add(new VCFFormatHeaderLine(UnifiedGenotypingEngine.PL_FOR_ALL_SNP_ALLELES_KEY, 10, VCFHeaderLineType.Integer, "Phred-scaled genotype likelihoods for all 4 possible bases regardless of whether there is statistical evidence for them. Ordering is always PL for AA AC CC GA GC GG TA TC TG TT."));
|
||||
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.PL_FOR_ALL_SNP_ALLELES_KEY));
|
||||
}
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true,
|
||||
VCFConstants.DOWNSAMPLED_KEY,
|
||||
VCFConstants.MLE_ALLELE_COUNT_KEY,
|
||||
VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.DOWNSAMPLED_KEY));
|
||||
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
|
||||
// also, check to see whether comp rods were included
|
||||
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
|
||||
|
|
@ -364,7 +365,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
|
||||
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
|
||||
// where the filters are used. For example, in emitting all sites the lowQual field is used
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotypingEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));
|
||||
|
||||
return headerInfo;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,8 +83,6 @@ import java.util.*;
|
|||
*/
|
||||
public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCollection> {
|
||||
|
||||
public static final String PL_FOR_ALL_SNP_ALLELES_KEY = "APL";
|
||||
|
||||
private static final int SNP_MODEL = 0;
|
||||
private static final int INDEL_MODEL = 1;
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
|
||||
|
|
@ -80,9 +81,9 @@ abstract class ExactAFCalculator extends AFCalculator {
|
|||
protected static final Comparator<LikelihoodSum> LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
|
||||
@Override
|
||||
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
|
||||
if (o1.allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)
|
||||
if (o1.allele == GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)
|
||||
return 1;
|
||||
else if (o2.allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)
|
||||
else if (o2.allele == GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)
|
||||
return -1;
|
||||
else
|
||||
return o1.compareTo(o2);
|
||||
|
|
@ -182,7 +183,7 @@ abstract class ExactAFCalculator extends AFCalculator {
|
|||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
|
||||
final int nonRefAltAlleleIndex = GATKVariantContextUtils.indexOfAltAllele(vc,
|
||||
GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE, false);
|
||||
GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE, false);
|
||||
final boolean nonRefAltAllelePresent = nonRefAltAlleleIndex >= 0;
|
||||
|
||||
// <NON_REF> should not be considered in the downsizing, so we need to count it out when
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ import com.google.java.contract.Requires;
|
|||
import htsjdk.variant.variantcontext.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -195,7 +195,7 @@ import java.util.*;
|
|||
else {
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
|
||||
final Allele reference = vcb.getAlleles().get(0);
|
||||
vcb.alleles(Arrays.asList(reference, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE));
|
||||
vcb.alleles(Arrays.asList(reference, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE));
|
||||
final int genotypeCount = GenotypeLikelihoodCalculators.genotypeCount(2, vc.getNAlleles());
|
||||
final double[] hetLikelihoods = new double[vc.getNAlleles() - 1];
|
||||
final double[] homAltLikelihoods = new double[genotypeCount - hetLikelihoods.length - 1];
|
||||
|
|
@ -213,7 +213,7 @@ import java.util.*;
|
|||
else if (oldAllele.isNoCall())
|
||||
newAlleles.add(Allele.NO_CALL);
|
||||
else
|
||||
newAlleles.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
newAlleles.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
}
|
||||
gb.alleles(newAlleles);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,9 +101,7 @@ import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
|
|||
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.*;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -557,9 +555,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
@Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="Disable physical phasing", required = false)
|
||||
protected boolean doNotRunPhysicalPhasing = false;
|
||||
|
||||
public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID";
|
||||
public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT";
|
||||
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
// arguments for debugging / developing the haplotype caller
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
|
@ -812,10 +807,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
// all annotation fields from VariantAnnotatorEngine
|
||||
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
|
||||
// all callers need to add these standard annotation header lines
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true,
|
||||
VCFConstants.DOWNSAMPLED_KEY,
|
||||
VCFConstants.MLE_ALLELE_COUNT_KEY,
|
||||
VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.DOWNSAMPLED_KEY));
|
||||
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
// all callers need to add these standard FORMAT field header lines
|
||||
VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
|
||||
VCFConstants.GENOTYPE_KEY,
|
||||
|
|
@ -824,13 +818,13 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
VCFConstants.GENOTYPE_PL_KEY);
|
||||
|
||||
if ( ! doNotRunPhysicalPhasing ) {
|
||||
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_ID_KEY, 1, VCFHeaderLineType.String, "Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
|
||||
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY));
|
||||
headerInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY));
|
||||
}
|
||||
|
||||
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
|
||||
// where the filters are used. For example, in emitting all sites the lowQual field is used
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotypingEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
headerInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));
|
||||
|
||||
initializeReferenceConfidenceModel(samplesList, headerInfo);
|
||||
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
|||
import org.broadinstitute.gatk.utils.haplotype.EventMap;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -121,7 +122,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
|
||||
@Override
|
||||
protected boolean forceKeepAllele(final Allele allele) {
|
||||
return allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE ||
|
||||
return allele == GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE ||
|
||||
configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ||
|
||||
configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE;
|
||||
}
|
||||
|
|
@ -262,7 +263,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
|
||||
|
||||
if (emitReferenceConfidence)
|
||||
readAlleleLikelihoods.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
readAlleleLikelihoods.addNonReferenceAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
|
||||
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC, noCallAlleles );
|
||||
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
|
||||
|
|
@ -489,7 +490,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
* @return true if this variant context is bi-allelic, ignoring the NON-REF symbolic allele, false otherwise
|
||||
*/
|
||||
private static boolean isBiallelic(final VariantContext vc) {
|
||||
return vc.isBiallelic() || (vc.getNAlleles() == 3 && vc.getAlternateAlleles().contains(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE));
|
||||
return vc.isBiallelic() || (vc.getNAlleles() == 3 && vc.getAlternateAlleles().contains(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -514,7 +515,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
private static VariantContext phaseVC(final VariantContext vc, final String ID, final String phaseGT) {
|
||||
final List<Genotype> phasedGenotypes = new ArrayList<>();
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY, ID).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_GT_KEY, phaseGT).make());
|
||||
phasedGenotypes.add(new GenotypeBuilder(g).attribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY, ID).attribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, phaseGT).make());
|
||||
return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make();
|
||||
}
|
||||
|
||||
|
|
@ -523,7 +524,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
final List<Allele> originalList = mergedVC.getAlleles();
|
||||
final List<Allele> alleleList = new ArrayList<>(originalList.size() + 1);
|
||||
alleleList.addAll(mergedVC.getAlleles());
|
||||
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
alleleList.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
vcb.alleles(alleleList);
|
||||
return vcb.make();
|
||||
}
|
||||
|
|
@ -552,7 +553,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
readAlleleLikelihoodsForAnnotations = readHaplotypeLikelihoods.marginalize(alleleMapper, loc);
|
||||
if (emitReferenceConfidence)
|
||||
readAlleleLikelihoodsForAnnotations.addNonReferenceAllele(
|
||||
GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
}
|
||||
|
||||
// Skim the filtered map based on the location so that we do not add filtered read that are going to be removed
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
|||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -138,7 +139,7 @@ public class ReferenceConfidenceModel {
|
|||
public Set<VCFHeaderLine> getVCFHeaderLines() {
|
||||
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();
|
||||
// TODO - do we need a new kind of VCF Header subclass for specifying arbitrary alternate alleles?
|
||||
headerLines.add(new VCFSimpleHeaderLine(ALTERNATE_ALLELE_STRING, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE_NAME, "Represents any possible alternative allele at this location"));
|
||||
headerLines.add(new VCFSimpleHeaderLine(ALTERNATE_ALLELE_STRING, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME, "Represents any possible alternative allele at this location"));
|
||||
//headerLines.add(new VCFFormatHeaderLine(INDEL_INFORMATIVE_DEPTH, 1, VCFHeaderLineType.Integer, "Number of reads at locus that are informative about an indel of size <= " + indelInformativeDepthIndelSize));
|
||||
return headerLines;
|
||||
}
|
||||
|
|
@ -215,7 +216,7 @@ public class ReferenceConfidenceModel {
|
|||
homRefCalc.capByHomRefLikelihood();
|
||||
|
||||
final Allele refAllele = Allele.create(refBase, true);
|
||||
final List<Allele> refSiteAlleles = Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> refSiteAlleles = Arrays.asList(refAllele, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder("HC", curPos.getContig(), curPos.getStart(), curPos.getStart(), refSiteAlleles);
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, GATKVariantContextUtils.homozygousAlleleList(refAllele, ploidy));
|
||||
gb.AD(homRefCalc.AD_Ref_Any);
|
||||
|
|
@ -320,7 +321,7 @@ public class ReferenceConfidenceModel {
|
|||
public RefVsAnyResult calcGenotypeLikelihoodsOfRefVsAny(final String sampleName, final int ploidy,
|
||||
final GenotypingModel genotypingModel,
|
||||
final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final MathUtils.RunningAverage hqSoftClips) {
|
||||
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(Allele.create(refBase,true),GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(Allele.create(refBase,true), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
// Notice that the sample name is rather irrelevant as this information is never used, just need to be the same in both lines bellow.
|
||||
|
||||
final int maximumReadCount = pileup.getReads().size();
|
||||
|
|
|
|||
|
|
@ -65,6 +65,8 @@ import org.broadinstitute.gatk.utils.QualityUtils;
|
|||
import org.broadinstitute.gatk.engine.SampleUtils;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -146,7 +148,6 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
@Output
|
||||
protected VariantContextWriter vcfWriter = null;
|
||||
|
||||
private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
|
||||
private final String SOURCE_NAME = "PhaseByTransmission";
|
||||
|
||||
public final double NO_TRANSMISSION_PROB = -1.0;
|
||||
|
|
@ -414,7 +415,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
||||
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
|
||||
genotypeAttributes.put(GATKVCFConstants.TRANSMISSION_PROBABILITY_KEY, phredScoreTransmission);
|
||||
|
||||
ArrayList<Allele> phasedAlleles = new ArrayList<Allele>(2);
|
||||
for(Allele allele : phasedGenotype.getAlleles()){
|
||||
|
|
@ -461,7 +462,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
headerLines.addAll(GATKVCFUtils.getHeaderFields(this.getToolkit()));
|
||||
headerLines.add(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred score of the genotype combination and phase given that the genotypes are correct"));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.TRANSMISSION_PROBABILITY_KEY));
|
||||
headerLines.add(new VCFHeaderLine("source", SOURCE_NAME));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
|
||||
|
||||
|
|
@ -879,7 +880,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
|
||||
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
|
||||
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),printAD(phasedMother.getAD()),
|
||||
phasedMother.getExtendedAttribute(GATKVCFConstants.TRANSMISSION_PROBABILITY_KEY),phasedMother.getGenotypeString(),phasedMother.getDP(),printAD(phasedMother.getAD()),
|
||||
phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),printAD(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
|
||||
|
|
@ -891,7 +892,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s:%s:%s:%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s",
|
||||
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
|
||||
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),printAD(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
|
||||
phasedMother.getExtendedAttribute(GATKVCFConstants.TRANSMISSION_PROBABILITY_KEY),phasedMother.getGenotypeString(),phasedMother.getDP(),printAD(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
}
|
||||
}
|
||||
|
|
@ -902,7 +903,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
|
||||
vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
|
||||
phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),printAD(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedFather.getExtendedAttribute(GATKVCFConstants.TRANSMISSION_PROBABILITY_KEY),phasedFather.getGenotypeString(),phasedFather.getDP(),printAD(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ import htsjdk.samtools.util.StringUtil;
|
|||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
|
|
@ -116,11 +117,9 @@ class PhasingUtils {
|
|||
// locations of the same HP attribute in gt2 to gt2
|
||||
final int[] site1ToSite2Inds = new int[numAlleles];
|
||||
|
||||
// If both genotypes have read-backed phasing haplotype identifiers (HP)
|
||||
// Find the gt1 and gt2 alleles with the same haplotpe
|
||||
if (gt1.hasAnyAttribute(ReadBackedPhasing.HP_KEY) && gt2.hasAnyAttribute(ReadBackedPhasing.HP_KEY)) {
|
||||
final String[] hp1 = (String[]) gt1.getAnyAttribute(ReadBackedPhasing.HP_KEY);
|
||||
final String[] hp2 = (String[]) gt2.getAnyAttribute(ReadBackedPhasing.HP_KEY);
|
||||
if (gt1.hasAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY) && gt2.hasAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY)) {
|
||||
final String[] hp1 = (String[]) gt1.getAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY);
|
||||
final String[] hp2 = (String[]) gt2.getAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY);
|
||||
|
||||
// Map of HP attribute to it's array index
|
||||
final HashMap<String, Integer> hpNameToSite1Inds = new HashMap<String, Integer>();
|
||||
|
|
@ -204,21 +203,20 @@ class PhasingUtils {
|
|||
|
||||
// get the min read backed phasing quality
|
||||
double PQ = Double.MAX_VALUE;
|
||||
if (gt1.hasAnyAttribute(ReadBackedPhasing.PQ_KEY)) {
|
||||
PQ = Math.min(PQ, (double) gt1.getAnyAttribute(ReadBackedPhasing.PQ_KEY));
|
||||
if (gt1.hasAnyAttribute(VCFConstants.PHASE_QUALITY_KEY)) {
|
||||
PQ = Math.min(PQ, (double) gt1.getAnyAttribute(VCFConstants.PHASE_QUALITY_KEY));
|
||||
}
|
||||
if (gt2.hasAnyAttribute(ReadBackedPhasing.PQ_KEY)) {
|
||||
PQ = Math.min(PQ, (double) gt2.getAnyAttribute(ReadBackedPhasing.PQ_KEY));
|
||||
if (gt2.hasAnyAttribute(VCFConstants.PHASE_QUALITY_KEY)) {
|
||||
PQ = Math.min(PQ, (double) gt2.getAnyAttribute(VCFConstants.PHASE_QUALITY_KEY));
|
||||
}
|
||||
if (PQ != Double.MAX_VALUE)
|
||||
mergedGtAttribs.put(ReadBackedPhasing.PQ_KEY, PQ);
|
||||
mergedGtAttribs.put(VCFConstants.PHASE_QUALITY_KEY, PQ);
|
||||
|
||||
// get the read backed phasing phasing haplotype identifier
|
||||
if (gt1.hasAnyAttribute(ReadBackedPhasing.HP_KEY)) {
|
||||
mergedGtAttribs.put(ReadBackedPhasing.HP_KEY, gt1.getAnyAttribute(ReadBackedPhasing.HP_KEY));
|
||||
if (gt1.hasAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY)) {
|
||||
mergedGtAttribs.put(GATKVCFConstants.RBP_HAPLOTYPE_KEY, gt1.getAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY));
|
||||
}
|
||||
else if (gt2.hasAnyAttribute(ReadBackedPhasing.HP_KEY)) { // gt1 doesn't have, but merged (so gt1 is hom and can take gt2's haplotype names):
|
||||
mergedGtAttribs.put(ReadBackedPhasing.HP_KEY, gt2.getAnyAttribute(ReadBackedPhasing.HP_KEY));
|
||||
else if (gt2.hasAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY)) { // gt1 doesn't have, but merged (so gt1 is hom and can take gt2's haplotype names):
|
||||
mergedGtAttribs.put(GATKVCFConstants.RBP_HAPLOTYPE_KEY, gt2.getAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY));
|
||||
}
|
||||
|
||||
// make the merged genotype
|
||||
|
|
@ -378,12 +376,12 @@ class PhasingUtils {
|
|||
return true;
|
||||
|
||||
// If gt1 or gt2 do not have a read backed phasing haplotype, then can not be merged
|
||||
if (!gt1.hasAnyAttribute(ReadBackedPhasing.HP_KEY) || !gt2.hasAnyAttribute(ReadBackedPhasing.HP_KEY))
|
||||
if (!gt1.hasAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY) || !gt2.hasAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY))
|
||||
return false;
|
||||
|
||||
// If gt1 or gt2 do not same number of HP attributes as chromosomes, then can not be merged.
|
||||
final String[] hp1 = (String[]) gt1.getAnyAttribute(ReadBackedPhasing.HP_KEY);
|
||||
final String[] hp2 = (String[]) gt2.getAnyAttribute(ReadBackedPhasing.HP_KEY);
|
||||
final String[] hp1 = (String[]) gt1.getAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY);
|
||||
final String[] hp2 = (String[]) gt2.getAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY);
|
||||
if (hp1.length != gt1.getPloidy() || hp2.length != gt2.getPloidy())
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -65,6 +65,8 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
|
|
@ -179,15 +181,10 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
|
||||
private static PreciseNonNegativeDouble ZERO = new PreciseNonNegativeDouble(0.0);
|
||||
|
||||
public static final String PQ_KEY = "PQ";
|
||||
public static final String HP_KEY = "HP";
|
||||
|
||||
// In order to detect phase inconsistencies:
|
||||
private static final double FRACTION_OF_MEAN_PQ_CHANGES = 0.1; // If the PQ decreases by this fraction of the mean PQ changes (thus far), then this read is inconsistent with previous reads
|
||||
private static final double MAX_FRACTION_OF_INCONSISTENT_READS = 0.1; // If there are more than this fraction of inconsistent reads, then flag this site
|
||||
|
||||
public static final String PHASING_INCONSISTENT_KEY = "PhasingInconsistent";
|
||||
|
||||
@Argument(fullName = "enableMergePhasedSegregatingPolymorphismsToMNP", shortName = "enableMergeToMNP", doc = "Merge consecutive phased sites into MNP records", required = false)
|
||||
protected boolean enableMergePhasedSegregatingPolymorphismsToMNP = false;
|
||||
|
||||
|
|
@ -248,9 +245,9 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
// Phasing-specific INFO fields:
|
||||
hInfo.add(new VCFFormatHeaderLine(PQ_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality"));
|
||||
hInfo.add(new VCFFormatHeaderLine(HP_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Read-backed phasing haplotype identifiers"));
|
||||
hInfo.add(new VCFInfoHeaderLine(PHASING_INCONSISTENT_KEY, 0, VCFHeaderLineType.Flag, "Are the reads significantly haplotype-inconsistent?"));
|
||||
hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.PHASE_QUALITY_KEY, true));
|
||||
hInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.RBP_HAPLOTYPE_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.RBP_INCONSISTENT_KEY));
|
||||
|
||||
// todo -- fix samplesToPhase
|
||||
String trackName = variantCollection.variants.getName();
|
||||
|
|
@ -320,7 +317,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
return new PhasingStatsAndOutput(phaseStats, completedList);
|
||||
}
|
||||
|
||||
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList(PQ_KEY));
|
||||
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<>(Arrays.asList(VCFConstants.PHASE_QUALITY_KEY));
|
||||
|
||||
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
|
|
@ -445,15 +442,15 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
if (phasedCurGenotypeRelativeToPrevious) {
|
||||
Genotype prevHetGenotype = phaseWindow.phaseRelativeToGenotype();
|
||||
SNPallelePair prevAllelePair = new SNPallelePair(prevHetGenotype);
|
||||
if (!prevHetGenotype.hasAnyAttribute(HP_KEY))
|
||||
if (!prevHetGenotype.hasAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY))
|
||||
throw new ReviewedGATKException("Internal error: missing haplotype markings for previous genotype, even though we put it there...");
|
||||
String[] prevPairNames = (String[]) prevHetGenotype.getAnyAttribute(HP_KEY);
|
||||
String[] prevPairNames = (String[]) prevHetGenotype.getAnyAttribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY);
|
||||
|
||||
String[] curPairNames = ensurePhasing(allelePair, prevAllelePair, prevPairNames, pr.haplotype);
|
||||
Genotype phasedGt = new GenotypeBuilder(gt)
|
||||
.alleles(allelePair.getAllelesAsList())
|
||||
.attribute(PQ_KEY, pr.phaseQuality)
|
||||
.attribute(HP_KEY, curPairNames)
|
||||
.attribute(VCFConstants.PHASE_QUALITY_KEY, pr.phaseQuality)
|
||||
.attribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY, curPairNames)
|
||||
.make();
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
|
||||
|
|
@ -506,7 +503,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
String locStr = Integer.toString(GATKVariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc).getStart());
|
||||
|
||||
Genotype startNewHaplotypeGt = new GenotypeBuilder(gt)
|
||||
.attribute(HP_KEY, new String[]{locStr + "-1", locStr + "-2"})
|
||||
.attribute(GATKVCFConstants.RBP_HAPLOTYPE_KEY, new String[]{locStr + "-1", locStr + "-2"})
|
||||
.make();
|
||||
|
||||
uvc.setGenotype(samp, startNewHaplotypeGt);
|
||||
|
|
@ -1296,7 +1293,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
}
|
||||
|
||||
public void setPhasingInconsistent() {
|
||||
attributes.put(PHASING_INCONSISTENT_KEY, true);
|
||||
attributes.put(GATKVCFConstants.RBP_INCONSISTENT_KEY, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,8 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.*;
|
|||
import org.broadinstitute.gatk.engine.SampleUtils;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
|
@ -333,7 +335,7 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
samples = SampleUtils.getSampleList(header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true);
|
||||
headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
|
||||
headerLines.add(new VCFInfoHeaderLine("callStatus", 1, VCFHeaderLineType.String, "Value from the validation VCF"));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
}
|
||||
|
||||
|
|
@ -496,8 +498,8 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
}
|
||||
|
||||
if (vcfWriter != null && writeVariant) {
|
||||
if (!vcComp.hasAttribute("callStatus")) {
|
||||
vcfWriter.add(new VariantContextBuilder(vcComp).attribute("callStatus", call.isCalledAlt(callConf) ? "ALT" : "REF").make());
|
||||
if (!vcComp.hasAttribute(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY)) {
|
||||
vcfWriter.add(new VariantContextBuilder(vcComp).attribute(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY, call.isCalledAlt(callConf) ? "ALT" : "REF").make());
|
||||
}
|
||||
else
|
||||
vcfWriter.add(vcComp);
|
||||
|
|
|
|||
|
|
@ -69,6 +69,8 @@ import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
|||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.variantcontext.VariantContextBuilder;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -255,10 +257,10 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
|
||||
public static void addVQSRStandardHeaderLines(final Set<VCFHeaderLine> hInfo) {
|
||||
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
|
||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
|
||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.POSITIVE_LABEL_KEY, 1, VCFHeaderLineType.Flag, "This variant was used to build the positive training set of good variants"));
|
||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.NEGATIVE_LABEL_KEY, 1, VCFHeaderLineType.Flag, "This variant was used to build the negative training set of bad variants"));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.VQS_LOD_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.CULPRIT_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.POSITIVE_LABEL_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NEGATIVE_LABEL_KEY));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -285,7 +287,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc );
|
||||
}
|
||||
|
||||
final String lodString = recalDatum.getAttributeAsString(VariantRecalibrator.VQS_LOD_KEY, null);
|
||||
final String lodString = recalDatum.getAttributeAsString(GATKVCFConstants.VQS_LOD_KEY, null);
|
||||
if( lodString == null ) {
|
||||
throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc );
|
||||
}
|
||||
|
|
@ -299,12 +301,12 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
|
||||
// Annotate the new record with its VQSLOD and the worst performing annotation
|
||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
|
||||
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
|
||||
if ( recalDatum.hasAttribute(VariantRecalibrator.POSITIVE_LABEL_KEY))
|
||||
builder.attribute(VariantRecalibrator.POSITIVE_LABEL_KEY, true);
|
||||
if ( recalDatum.hasAttribute(VariantRecalibrator.NEGATIVE_LABEL_KEY))
|
||||
builder.attribute(VariantRecalibrator.NEGATIVE_LABEL_KEY, true);
|
||||
builder.attribute(GATKVCFConstants.VQS_LOD_KEY, lod);
|
||||
builder.attribute(GATKVCFConstants.CULPRIT_KEY, recalDatum.getAttribute(GATKVCFConstants.CULPRIT_KEY));
|
||||
if ( recalDatum.hasAttribute(GATKVCFConstants.POSITIVE_LABEL_KEY))
|
||||
builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
|
||||
if ( recalDatum.hasAttribute(GATKVCFConstants.NEGATIVE_LABEL_KEY))
|
||||
builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
|
||||
|
||||
final String filterString = generateFilterString(lod);
|
||||
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -428,11 +429,11 @@ public class VariantDataManager {
|
|||
for( final VariantDatum datum : data ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStop(), alleles);
|
||||
builder.attribute(VCFConstants.END_KEY, datum.loc.getStop());
|
||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod));
|
||||
builder.attribute(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
|
||||
builder.attribute(GATKVCFConstants.VQS_LOD_KEY, String.format("%.4f", datum.lod));
|
||||
builder.attribute(GATKVCFConstants.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
|
||||
|
||||
if ( datum.atTrainingSite ) builder.attribute(VariantRecalibrator.POSITIVE_LABEL_KEY, true);
|
||||
if ( datum.atAntiTrainingSite ) builder.attribute(VariantRecalibrator.NEGATIVE_LABEL_KEY, true);
|
||||
if ( datum.atTrainingSite ) builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
|
||||
if ( datum.atAntiTrainingSite ) builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
|
||||
|
||||
recalWriter.add(builder.make());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -156,10 +156,6 @@ import java.util.*;
|
|||
@PartitionBy(PartitionType.NONE)
|
||||
public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDatum>, ExpandingArrayList<VariantDatum>> implements TreeReducible<ExpandingArrayList<VariantDatum>> {
|
||||
|
||||
public static final String VQS_LOD_KEY = "VQSLOD"; // Log odds ratio of being a true variant versus being false under the trained gaussian mixture model
|
||||
public static final String CULPRIT_KEY = "culprit"; // The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out
|
||||
public static final String NEGATIVE_LABEL_KEY = "NEGATIVE_TRAIN_SITE"; // this variant was used in the negative training set
|
||||
public static final String POSITIVE_LABEL_KEY = "POSITIVE_TRAIN_SITE"; // this variant was used in the positive training set
|
||||
private static final String PLOT_TRANCHES_RSCRIPT = "plot_Tranches.R";
|
||||
|
||||
@ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection();
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
|
|
@ -195,7 +197,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
* be used to inform the frequency distribution underying the genotype priors.
|
||||
*/
|
||||
@Input(fullName="supporting", shortName = "supporting", doc="Other callsets to use in generating genotype posteriors", required=false)
|
||||
public List<RodBinding<VariantContext>> supportVariants = new ArrayList<RodBinding<VariantContext>>();
|
||||
public List<RodBinding<VariantContext>> supportVariants = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* The global prior of a variant site -- i.e. the expected allele frequency distribution knowing only that N alleles
|
||||
|
|
@ -262,10 +264,6 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
@Output(doc="File to which variants should be written")
|
||||
protected VariantContextWriter vcfWriter = null;
|
||||
|
||||
private final String JOINT_LIKELIHOOD_TAG_NAME = "JL";
|
||||
private final String JOINT_POSTERIOR_TAG_NAME = "JP";
|
||||
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
||||
|
||||
private FamilyLikelihoodsUtils famUtils = new FamilyLikelihoodsUtils();
|
||||
|
||||
public void initialize() {
|
||||
|
|
@ -294,8 +292,8 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
throw new UserException("VCF has no genotypes");
|
||||
}
|
||||
|
||||
if ( header.hasInfoLine(VCFConstants.MLE_ALLELE_COUNT_KEY) ) {
|
||||
final VCFInfoHeaderLine mleLine = header.getInfoHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY);
|
||||
if ( header.hasInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY) ) {
|
||||
final VCFInfoHeaderLine mleLine = header.getInfoHeaderLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY);
|
||||
if ( mleLine.getCountType() != VCFHeaderLineCount.A ) {
|
||||
throw new UserException("VCF does not have a properly formatted MLEAC field: the count type should be \"A\"");
|
||||
}
|
||||
|
|
@ -307,11 +305,11 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
|
||||
// Initialize VCF header
|
||||
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||
headerLines.add(new VCFFormatHeaderLine(PHRED_SCALED_POSTERIORS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Phred-scaled Posterior Genotype Probabilities"));
|
||||
headerLines.add(new VCFInfoHeaderLine("PG", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Genotype Likelihood Prior"));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_PRIOR_KEY));
|
||||
if (!skipFamilyPriors) {
|
||||
headerLines.add(new VCFFormatHeaderLine(JOINT_LIKELIHOOD_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint likelihood of the genotype combination (before applying family priors)"));
|
||||
headerLines.add(new VCFFormatHeaderLine(JOINT_POSTERIOR_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint posterior probability of the genotype combination (after applying family priors)"));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.JOINT_LIKELIHOOD_TAG_NAME));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.JOINT_POSTERIOR_TAG_NAME));
|
||||
}
|
||||
headerLines.add(new VCFHeaderLine("source", "CalculateGenotypePosteriors"));
|
||||
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ import org.broadinstitute.gatk.engine.SampleUtils;
|
|||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
|
|
@ -338,7 +339,7 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
|||
genotypes.add(new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy())).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder("", first.getChr(), start, end, Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)).attributes(attrs).genotypes(genotypes).make();
|
||||
return new VariantContextBuilder("", first.getChr(), start, end, Arrays.asList(refAllele, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)).attributes(attrs).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.gatk.utils.MathUtils;
|
|||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.utils.GeneralUtils;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
|
|
@ -73,17 +74,13 @@ public class FamilyLikelihoodsUtils {
|
|||
|
||||
//Matrix of priors for all genotype combinations
|
||||
final private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> mvCountMatrix =
|
||||
new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||
new EnumMap<>(GenotypeType.class);
|
||||
|
||||
final int NUM_CALLED_GENOTYPETYPES = 3; //HOM_REF, HET, and HOM_VAR
|
||||
|
||||
double[] configurationLikelihoodsMatrix = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||
|
||||
ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||
|
||||
private final String JOINT_LIKELIHOOD_TAG_NAME = "JL";
|
||||
private final String JOINT_POSTERIOR_TAG_NAME = "JP";
|
||||
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
||||
ArrayList<Sample> trios = new ArrayList<>();
|
||||
|
||||
public final double NO_JOINT_VALUE = -1.0;
|
||||
|
||||
|
|
@ -158,10 +155,10 @@ public class FamilyLikelihoodsUtils {
|
|||
}
|
||||
|
||||
//Add the joint trio calculations
|
||||
final Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
final Map<String, Object> genotypeAttributes = new HashMap<>();
|
||||
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
||||
genotypeAttributes.put(JOINT_LIKELIHOOD_TAG_NAME, phredScaledJL);
|
||||
genotypeAttributes.put(JOINT_POSTERIOR_TAG_NAME, phredScaledJP);
|
||||
genotypeAttributes.put(GATKVCFConstants.JOINT_LIKELIHOOD_TAG_NAME, phredScaledJL);
|
||||
genotypeAttributes.put(GATKVCFConstants.JOINT_POSTERIOR_TAG_NAME, phredScaledJP);
|
||||
|
||||
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
|
||||
|
||||
|
|
@ -171,7 +168,7 @@ public class FamilyLikelihoodsUtils {
|
|||
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc.getAlleles(), builder,
|
||||
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, vc.getAlleles());
|
||||
|
||||
builder.attribute(PHRED_SCALED_POSTERIORS_KEY,
|
||||
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY,
|
||||
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(log10Posteriors).getAsPLs()));
|
||||
builder.attributes(genotypeAttributes);
|
||||
return builder.make();
|
||||
|
|
@ -231,7 +228,7 @@ public class FamilyLikelihoodsUtils {
|
|||
}
|
||||
break;
|
||||
default:
|
||||
throw new UserException(String.format("%d does not indicate a valid trio FamilyMember -- use 0 for mother, 1 for father, 2 for child",recalcInd));
|
||||
throw new UserException(String.format("%d does not indicate a valid trio FamilyMember -- use 0 for mother, 1 for father, 2 for child",recalcInd.ordinal()));
|
||||
}
|
||||
|
||||
recalcPosteriors[0] = MathUtils.log10sumLog10(marginalOverChangedHR,0);
|
||||
|
|
@ -262,7 +259,7 @@ public class FamilyLikelihoodsUtils {
|
|||
continue;
|
||||
}
|
||||
|
||||
final ArrayList<Genotype> trioGenotypes = new ArrayList<Genotype>(3);
|
||||
final ArrayList<Genotype> trioGenotypes = new ArrayList<>(3);
|
||||
updateFamilyGenotypes(vc, mother, father, child, trioGenotypes);
|
||||
|
||||
//replace uses sample names to match genotypes, so order doesn't matter
|
||||
|
|
@ -282,12 +279,12 @@ public class FamilyLikelihoodsUtils {
|
|||
private ArrayList<Sample> setTrios(Set<String> vcfSamples, Map<String,Set<Sample>> families){
|
||||
Set<Sample> family;
|
||||
ArrayList<Sample> parents;
|
||||
final ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||
final ArrayList<Sample> trios = new ArrayList<>();
|
||||
for(final Map.Entry<String,Set<Sample>> familyEntry : families.entrySet()){
|
||||
family = familyEntry.getValue();
|
||||
|
||||
// Since getFamilies(vcfSamples) above still returns parents of samples in the VCF even if those parents are not in the VCF, need to subset down here:
|
||||
final Set<Sample> familyMembersInVCF = new TreeSet<Sample>();
|
||||
final Set<Sample> familyMembersInVCF = new TreeSet<>();
|
||||
for(final Sample familyMember : family){
|
||||
if (vcfSamples.contains(familyMember.getID())) {
|
||||
familyMembersInVCF.add(familyMember);
|
||||
|
|
@ -331,7 +328,7 @@ public class FamilyLikelihoodsUtils {
|
|||
if(child == GenotypeType.NO_CALL || child == GenotypeType.UNAVAILABLE)
|
||||
return 0;
|
||||
//Add parents with genotypes for the evaluation
|
||||
final ArrayList<GenotypeType> parents = new ArrayList<GenotypeType>();
|
||||
final ArrayList<GenotypeType> parents = new ArrayList<>();
|
||||
if (!(mother == GenotypeType.NO_CALL || mother == GenotypeType.UNAVAILABLE))
|
||||
parents.add(mother);
|
||||
if (!(father == GenotypeType.NO_CALL || father == GenotypeType.UNAVAILABLE))
|
||||
|
|
@ -426,11 +423,11 @@ public class FamilyLikelihoodsUtils {
|
|||
|
||||
//Get a Map of genotype (log10)likelihoods
|
||||
private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||
final EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
final EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<>(GenotypeType.class);
|
||||
double[] likelihoods;
|
||||
|
||||
if (genotype != null && hasCalledGT(genotype.getType()) && genotype.hasExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY)) {
|
||||
Object GPfromVCF = genotype.getExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY);
|
||||
if (genotype != null && hasCalledGT(genotype.getType()) && genotype.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY)) {
|
||||
Object GPfromVCF = genotype.getExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY);
|
||||
//parse the GPs into a vector of probabilities
|
||||
final String[] likelihoodsAsStringVector = ((String)GPfromVCF).split(",");
|
||||
final double[] likelihoodsAsVector = new double[likelihoodsAsStringVector.length];
|
||||
|
|
|
|||
|
|
@ -72,13 +72,14 @@ import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider;
|
||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.engine.SampleUtils;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -183,7 +184,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions());
|
||||
headerLines.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
|
||||
// add the pool values for each genotype
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.MLE_ALLELE_COUNT_KEY, VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
|
||||
|
||||
|
|
@ -265,10 +267,10 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
private VariantContext addGenotypingAnnotations(final Map<String, Object> originalAttributes, final VariantContext newVC) {
|
||||
// we want to carry forward the attributes from the original VC but make sure to add the MLE-based annotations
|
||||
final Map<String, Object> attrs = new HashMap<>(originalAttributes);
|
||||
attrs.put(VCFConstants.MLE_ALLELE_COUNT_KEY, newVC.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
attrs.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, newVC.getAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
if (newVC.hasAttribute(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY))
|
||||
attrs.put(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, newVC.getAttribute(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY));
|
||||
attrs.put(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, newVC.getAttribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
attrs.put(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY, newVC.getAttribute(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
if (newVC.hasAttribute(GATKVCFConstants.NUMBER_OF_DISCOVERED_ALLELES_KEY))
|
||||
attrs.put(GATKVCFConstants.NUMBER_OF_DISCOVERED_ALLELES_KEY, newVC.getAttribute(GATKVCFConstants.NUMBER_OF_DISCOVERED_ALLELES_KEY));
|
||||
|
||||
return new VariantContextBuilder(newVC).attributes(attrs).make();
|
||||
}
|
||||
|
|
@ -305,8 +307,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
attrs.remove("SB");
|
||||
|
||||
// update PGT for hom vars
|
||||
if ( oldGT.isHomVar() && oldGT.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_GT_KEY) ) {
|
||||
attrs.put(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_GT_KEY, "1|1");
|
||||
if ( oldGT.isHomVar() && oldGT.hasExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY) ) {
|
||||
attrs.put(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, "1|1");
|
||||
}
|
||||
|
||||
// create AD if it's not there
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ package org.broadinstitute.gatk.tools.walkers.variantutils;
|
|||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
|
|
@ -62,8 +63,6 @@ import java.util.*;
|
|||
|
||||
public class PosteriorLikelihoodsUtils {
|
||||
|
||||
private static final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
||||
|
||||
public static VariantContext calculatePosteriorGLs(final VariantContext vc1,
|
||||
final Collection<VariantContext> resources,
|
||||
final int numRefSamplesFromMissingResources,
|
||||
|
|
@ -109,12 +108,12 @@ public class PosteriorLikelihoodsUtils {
|
|||
//parse the likelihoods for each sample's genotype
|
||||
final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples());
|
||||
for ( final Genotype genotype : vc1.getGenotypes() ) {
|
||||
if (!genotype.hasExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY)){
|
||||
if (!genotype.hasExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY)){
|
||||
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
|
||||
|
||||
}
|
||||
else {
|
||||
Object PPfromVCF = genotype.getExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY);
|
||||
Object PPfromVCF = genotype.getExtendedAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY);
|
||||
//parse the PPs into a vector of probabilities
|
||||
if (PPfromVCF instanceof String) {
|
||||
final String PPstring = (String)PPfromVCF;
|
||||
|
|
@ -153,7 +152,7 @@ public class PosteriorLikelihoodsUtils {
|
|||
if ( posteriors.get(genoIdx) != null ) {
|
||||
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc1.getAlleles(), builder,
|
||||
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, posteriors.get(genoIdx), vc1.getAlleles());
|
||||
builder.attribute(PHRED_SCALED_POSTERIORS_KEY,
|
||||
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY,
|
||||
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(posteriors.get(genoIdx)).getAsPLs()));
|
||||
}
|
||||
newContext.add(builder.make());
|
||||
|
|
@ -162,7 +161,7 @@ public class PosteriorLikelihoodsUtils {
|
|||
final List<Integer> priors = Utils.listFromPrimitives(
|
||||
GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2),useFlatPriors)).getAsPLs());
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc1).genotypes(newContext).attribute("PG", priors);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc1).genotypes(newContext).attribute(GATKVCFConstants.GENOTYPE_PRIOR_KEY, priors);
|
||||
// add in the AC, AF, and AN attributes
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
return builder.make();
|
||||
|
|
@ -266,8 +265,8 @@ public class PosteriorLikelihoodsUtils {
|
|||
private static void addAlleleCounts(final Map<Allele,Integer> counts, final VariantContext context, final boolean useAC) {
|
||||
final int[] ac;
|
||||
//use MLEAC value...
|
||||
if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) {
|
||||
ac = getAlleleCounts(VCFConstants.MLE_ALLELE_COUNT_KEY, context);
|
||||
if ( context.hasAttribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) {
|
||||
ac = getAlleleCounts(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, context);
|
||||
}
|
||||
//...unless specified by the user in useAC or unless MLEAC is absent
|
||||
else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
|
|
@ -346,7 +345,7 @@ public class PosteriorLikelihoodsUtils {
|
|||
}
|
||||
if ( mleList == null )
|
||||
throw new IllegalArgumentException(String.format("VCF does not have properly formatted "+
|
||||
VCFConstants.MLE_ALLELE_COUNT_KEY+" or "+VCFConstants.ALLELE_COUNT_KEY));
|
||||
GATKVCFConstants.MLE_ALLELE_COUNT_KEY+" or "+VCFConstants.ALLELE_COUNT_KEY));
|
||||
|
||||
final int[] mle = new int[mleList.size()];
|
||||
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@
|
|||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
||||
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
|
|
@ -58,6 +59,7 @@ import org.broadinstitute.gatk.utils.MathUtils;
|
|||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -123,7 +125,7 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
}
|
||||
|
||||
// Add <NON_REF> to the end if at all required in in the output.
|
||||
if (!removeNonRefSymbolicAllele) finalAlleleSet.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
if (!removeNonRefSymbolicAllele) finalAlleleSet.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
|
||||
final List<Allele> allelesList = new ArrayList<>(finalAlleleSet);
|
||||
|
||||
|
|
@ -138,7 +140,7 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
|
||||
} else { // handle the gVCF case from the HaplotypeCaller
|
||||
for( final Genotype gt : vc.getGenotypes() ) {
|
||||
depth += (gt.hasExtendedAttribute("MIN_DP") ? Integer.parseInt((String)gt.getAnyAttribute("MIN_DP")) : (gt.hasDP() ? gt.getDP() : 0));
|
||||
depth += (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY) ? Integer.parseInt((String)gt.getAnyAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) : (gt.hasDP() ? gt.getDP() : 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -198,8 +200,8 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
|
||||
attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||
attributes.remove(VCFConstants.ALLELE_NUMBER_KEY);
|
||||
attributes.remove(VCFConstants.MLE_ALLELE_COUNT_KEY);
|
||||
attributes.remove(VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
attributes.remove(GATKVCFConstants.MLE_ALLELE_COUNT_KEY);
|
||||
attributes.remove(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
attributes.remove(VCFConstants.END_KEY);
|
||||
}
|
||||
|
||||
|
|
@ -262,7 +264,7 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
} else if (a.isSymbolic()) {
|
||||
result.add(a);
|
||||
// we always skip <NON_REF> when adding to finalAlleles this is done outside if applies.
|
||||
if (!a.equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE))
|
||||
if (!a.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE))
|
||||
finalAlleles.add(a);
|
||||
} else if (a.isCalled()) {
|
||||
final Allele newAllele;
|
||||
|
|
@ -293,7 +295,7 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
|
||||
final List<Allele> result = new ArrayList<>(alleles.size());
|
||||
for ( final Allele allele : alleles )
|
||||
result.add(allele.equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) ? allele : Allele.NO_CALL);
|
||||
result.add(allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) ? allele : Allele.NO_CALL);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -374,10 +376,10 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
if ( remappedAlleles == null || remappedAlleles.size() == 0 ) throw new IllegalArgumentException("The list of input alleles must not be null or empty");
|
||||
if ( targetAlleles == null || targetAlleles.size() == 0 ) throw new IllegalArgumentException("The list of target alleles must not be null or empty");
|
||||
|
||||
if ( !remappedAlleles.contains(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) )
|
||||
throw new UserException("The list of input alleles must contain " + GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE + " as an allele but that is not the case at position " + position + "; please use the Haplotype Caller with gVCF output to generate appropriate records");
|
||||
if ( !remappedAlleles.contains(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) )
|
||||
throw new UserException("The list of input alleles must contain " + GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE + " as an allele but that is not the case at position " + position + "; please use the Haplotype Caller with gVCF output to generate appropriate records");
|
||||
|
||||
final int indexOfNonRef = remappedAlleles.indexOf(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final int indexOfNonRef = remappedAlleles.indexOf(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
|
||||
//if the refs don't match then let the non-ref allele be the most likely of the alts
|
||||
//TODO: eventually it would be nice to be able to trim alleles for spanning events to see if they really do have the same ref
|
||||
|
|
@ -399,7 +401,7 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
indexMapping[0] = 0;
|
||||
|
||||
// create the index mapping, using the <ALT> allele whenever such a mapping doesn't exist
|
||||
final int targetNonRef = targetAlleles.indexOf(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final int targetNonRef = targetAlleles.indexOf(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final boolean targetHasNonRef = targetNonRef != -1;
|
||||
final int lastConcreteAlt = targetHasNonRef ? targetAlleles.size()-2 : targetAlleles.size()-1;
|
||||
for ( int i = 1; i <= lastConcreteAlt; i++ ) {
|
||||
|
|
|
|||
|
|
@ -57,6 +57,8 @@ import htsjdk.variant.variantcontext.VariantContext;
|
|||
import htsjdk.variant.variantcontext.VariantContextBuilder;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
|
@ -71,10 +73,6 @@ import java.util.List;
|
|||
* Time: 2:51 PM
|
||||
*/
|
||||
public class GVCFWriter implements VariantContextWriter {
|
||||
//
|
||||
// static VCF field names
|
||||
//
|
||||
protected final static String MIN_DP_FORMAT_FIELD = "MIN_DP";
|
||||
|
||||
//
|
||||
// Final fields initialized in constructor
|
||||
|
|
@ -151,7 +149,7 @@ public class GVCFWriter implements VariantContextWriter {
|
|||
public void writeHeader(VCFHeader header) {
|
||||
if ( header == null ) throw new IllegalArgumentException("header cannot be null");
|
||||
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||
header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block"));
|
||||
header.addMetaDataLine(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.MIN_DP_FORMAT_KEY));
|
||||
|
||||
for ( final HomRefBlock partition : GQPartitions ) {
|
||||
header.addMetaDataLine(partition.toVCFHeaderLine());
|
||||
|
|
@ -251,7 +249,7 @@ public class GVCFWriter implements VariantContextWriter {
|
|||
final int gq = genotypeQualityFromPLs(minPLs);
|
||||
gb.GQ(gq);
|
||||
gb.DP(block.getMedianDP());
|
||||
gb.attribute(MIN_DP_FORMAT_FIELD, block.getMinDP());
|
||||
gb.attribute(GATKVCFConstants.MIN_DP_FORMAT_KEY, block.getMinDP());
|
||||
|
||||
// This annotation is no longer standard
|
||||
//gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ());
|
||||
|
|
@ -330,7 +328,7 @@ public class GVCFWriter implements VariantContextWriter {
|
|||
}
|
||||
|
||||
final Genotype g = vc.getGenotype(0);
|
||||
if ( g.isHomRef() && vc.hasAlternateAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) && vc.isBiallelic() ) {
|
||||
if ( g.isHomRef() && vc.hasAlternateAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) && vc.isBiallelic() ) {
|
||||
// create bands
|
||||
final VariantContext maybeCompletedBand = addHomRefSite(vc, g);
|
||||
if ( maybeCompletedBand != null ) underlyingWriter.add(maybeCompletedBand);
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -82,8 +83,8 @@ public class GenotypeSummariesUnitTest {
|
|||
final GenotypeSummaries GS = new GenotypeSummaries();
|
||||
final Map<String,Object> resultMap = GS.annotate(null, null, null, null, testVC, null);
|
||||
|
||||
Assert.assertEquals(1, resultMap.get(GenotypeSummaries.NCC)); // 1 no-called called sample
|
||||
Assert.assertEquals(30.0, Double.parseDouble((String)resultMap.get(GenotypeSummaries.GQ_MEAN)), 1E-4); // mean GQ is 30
|
||||
Assert.assertFalse(resultMap.containsKey(GenotypeSummaries.GQ_STDDEV)); // no stddev with only one data point
|
||||
Assert.assertEquals(1, resultMap.get(GATKVCFConstants.NOCALL_CHROM_KEY)); // 1 no-called called sample
|
||||
Assert.assertEquals(30.0, Double.parseDouble((String)resultMap.get(GATKVCFConstants.GQ_MEAN_KEY)), 1E-4); // mean GQ is 30
|
||||
Assert.assertFalse(resultMap.containsKey(GATKVCFConstants.GQ_STDEV_KEY)); // no stddev with only one data point
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -71,11 +71,11 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testSingleSample() {
|
||||
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "6ca3d3917a7b65eaa877aa3658d80912");
|
||||
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "5cad1b8e3bf5582842bbeadbc173e8aa");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultiSample() {
|
||||
DTTest("testMultiSample ", "-I " + multiSample, "f50c6b9bef9f63f0a8b32ae9a9bdd51a");
|
||||
DTTest("testMultiSample ", "-I " + multiSample, "c2a11ad34104fd5e4e65bdf049abe5e7");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ import org.broadinstitute.gatk.utils.MathUtils;
|
|||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.testng.Assert;
|
||||
import org.testng.SkipException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -451,9 +451,9 @@ public class ReadLikelihoodsUnitTest
|
|||
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
|
||||
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
|
||||
final ReadLikelihoods<Allele> result = original.clone();
|
||||
result.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
result.addNonReferenceAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertEquals(result.alleleCount(),original.alleleCount() + 1);
|
||||
Assert.assertEquals(result.alleleIndex(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE),result.alleleCount() - 1);
|
||||
Assert.assertEquals(result.alleleIndex(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE),result.alleleCount() - 1);
|
||||
final double[][][] newLikelihoods = new double[originalLikelihoods.length][][];
|
||||
for (int s = 0; s < samples.length; s++) {
|
||||
newLikelihoods[s] = Arrays.copyOf(originalLikelihoods[s],originalLikelihoods[s].length + 1);
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
|||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.smithwaterman.Parameters;
|
||||
import org.broadinstitute.gatk.utils.smithwaterman.SWPairwiseAlignment;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -230,7 +230,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
|
|||
}
|
||||
Allele altAllele = null;
|
||||
for (final Allele allele : updatedVc.getAlleles())
|
||||
if (allele.isSymbolic() && allele.getBaseString().equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE_NAME))
|
||||
if (allele.isSymbolic() && allele.getBaseString().equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME))
|
||||
altAllele = allele;
|
||||
Assert.assertNotNull(altAllele);
|
||||
}
|
||||
|
|
@ -521,8 +521,8 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
|
|||
int counter = 0;
|
||||
for ( final VariantContext call : actualPhasedCalls ) {
|
||||
for ( final Genotype g : call.getGenotypes() ) {
|
||||
if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY) ) {
|
||||
uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY).toString());
|
||||
if ( g.hasExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY) ) {
|
||||
uniqueGroups.add(g.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY).toString());
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
|
|||
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -392,7 +393,7 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
|
|||
refModel.getStart() - call.getStart() + 1), refModel.getReference().getBaseString(), "" + data.getRefHap()); // the reference must be the same.
|
||||
Assert.assertTrue(refModel.getGenotype(0).getGQ() <= 0); // No confidence in the reference hom-ref call across the deletion
|
||||
Assert.assertEquals(refModel.getAlleles().size(),2); // the reference and the lonelly <NON_REF>
|
||||
Assert.assertEquals(refModel.getAlleles().get(1),GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertEquals(refModel.getAlleles().get(1), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
} else {
|
||||
Assert.assertEquals(refModel, call, "Should have found call " + call + " but found " + refModel + " instead");
|
||||
}
|
||||
|
|
@ -403,7 +404,7 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
|
|||
Assert.assertEquals(refModel.getEnd(), loc.getStart() + i);
|
||||
Assert.assertFalse(refModel.hasLog10PError());
|
||||
Assert.assertEquals(refModel.getAlternateAlleles().size(), 1);
|
||||
Assert.assertEquals(refModel.getAlternateAllele(0), GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertEquals(refModel.getAlternateAllele(0), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertTrue(refModel.hasGenotype(sample));
|
||||
|
||||
final Genotype g = refModel.getGenotype(sample);
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("27bc40f7cc46bdc347284d7522b2aa6c"));
|
||||
Arrays.asList("6483df1dfa3a5290ba2dc10cc8e15370"));
|
||||
executeTest("testUpdatePGT", spec);
|
||||
}
|
||||
|
||||
|
|
@ -83,7 +83,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("bb7775a555ee9859e18a28cbc044a160"));
|
||||
Arrays.asList("d873327b474fa341cee7823a35efda89"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -95,7 +95,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("a2e482cddbc987b0ba004e13044f6e81"));
|
||||
Arrays.asList("f5b3c4b4b45f7d3bc4a38ff5ac7076f0"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -107,7 +107,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("0ad7d784a15ad7f8b386ec7ca34032af"));
|
||||
Arrays.asList("1f4632023ac646d7d04f65d797109f91"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -119,7 +119,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("fdd06679c8a14ef2010d075cbae76519"));
|
||||
Arrays.asList("70376c02babc75c15a1e9a6be47e34fa"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
||||
}
|
||||
|
||||
|
|
@ -132,7 +132,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("9b7f2ba1bde2e0a0eb3ebc0afb6bc513"));
|
||||
Arrays.asList("8c814998059fda80cf5a18242db13c19"));
|
||||
executeTest("combineSingleSamplePipelineGVCFHierarchical", spec);
|
||||
}
|
||||
|
||||
|
|
@ -144,7 +144,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||
1,
|
||||
Arrays.asList("8201cee7120dfdb3fdeace0ec511c7b1"));
|
||||
Arrays.asList("114a75003083cbe1a9966cc489d441af"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||
}
|
||||
|
||||
|
|
@ -154,7 +154,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "gvcfExample1.vcf",
|
||||
1,
|
||||
Arrays.asList("b4bb1d21c7a3d793a98b0857c7c5d52b"));
|
||||
Arrays.asList("364043ee77d4c6dfe1403a90b4938a65"));
|
||||
executeTest("testJustOneSample", spec);
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
||||
" -V " + privateTestDir + "gvcfExample2.vcf",
|
||||
1,
|
||||
Arrays.asList("ec63a629cc707554d3dd2ba7254b3b8d"));
|
||||
Arrays.asList("3fc58414196213bc3a85237b055b7883"));
|
||||
executeTest("testSamplesWithDifferentLs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -176,12 +176,12 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
|
||||
1,
|
||||
Arrays.asList("9626a7108d616d63a2a8069b306c1fe0"));
|
||||
Arrays.asList("08e4b839dede1b91ce6bd89c66ff063c"));
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.nocall.vcf",
|
||||
1,
|
||||
Arrays.asList("9626a7108d616d63a2a8069b306c1fe0"));
|
||||
Arrays.asList("08e4b839dede1b91ce6bd89c66ff063c"));
|
||||
executeTest("testNoPLsException.1", spec1);
|
||||
executeTest("testNoPLsException.2", spec2);
|
||||
}
|
||||
|
|
@ -191,7 +191,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-nda"),
|
||||
1,
|
||||
Arrays.asList("d50e5035488f63c574dcb8485ff61fcb"));
|
||||
Arrays.asList("6bfc0d58eed9fc98642f36a09b1a235d"));
|
||||
executeTest("testNDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -200,7 +200,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-maxAltAlleles 1"),
|
||||
1,
|
||||
Arrays.asList("8fa78191298b4d8c9b40fba2c705ad56"));
|
||||
Arrays.asList("1e238c736e3f43e3693327a89455faaa"));
|
||||
executeTest("testMaxAltAlleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -209,7 +209,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-stand_call_conf 300 -stand_emit_conf 100"),
|
||||
1,
|
||||
Arrays.asList("bd58c026e9c8df4d4166f22cd0f0ce65"));
|
||||
Arrays.asList("9c192402a005216649ff44a36cc7c45c"));
|
||||
executeTest("testStandardConf", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -62,6 +62,7 @@ import org.broadinstitute.gatk.utils.BaseTest;
|
|||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -74,7 +75,6 @@ import java.util.List;
|
|||
public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||
|
||||
Allele Aref, T, C, G, Cref, ATC, ATCATC;
|
||||
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
||||
|
||||
@BeforeSuite
|
||||
public void setup() {
|
||||
|
|
@ -151,16 +151,16 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s10",Aref,T,20,0,10),
|
||||
makeG("s11",T,T,60,40,0),
|
||||
makeG("s12",Aref,Aref,0,30,90));
|
||||
test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,12).make();
|
||||
test1 = new VariantContextBuilder(test1).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,12).make();
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, false);
|
||||
Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.26110257, -0.02700903, -1.26110257});
|
||||
Assert.assertTrue(test1exp1.hasPL());
|
||||
Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000075e+00, -3.765981e+00, -7.488009e-05});
|
||||
Genotype test1exp3 = makeGwithPLs("s3",Aref,Aref,new double[]{-0.0007438855, -2.7666503408, -9.0007438855});
|
||||
Assert.assertEquals("java.util.ArrayList",test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY).getClass().getCanonicalName());
|
||||
Assert.assertEquals(arraysEq(test1exp1.getPL(), _mleparse((List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>)test1result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp3.getPL(),_mleparse((List<Integer>)test1result.getGenotype(2).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals("java.util.ArrayList",test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY).getClass().getCanonicalName());
|
||||
Assert.assertEquals(arraysEq(test1exp1.getPL(), _mleparse((List<Integer>)test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>)test1result.getGenotype(1).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp3.getPL(),_mleparse((List<Integer>)test1result.getGenotype(2).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
|
||||
// AA AB BB AC BC CC
|
||||
// AA AC CC AT CT TT
|
||||
|
|
@ -177,16 +177,16 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s10",Aref,C,40,0,10,30,40,80),
|
||||
makeG("s11",Aref,Aref,0,5,8,15,20,40),
|
||||
makeG("s12",C,T,80,40,12,20,0,10));
|
||||
test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(6,6))).make();
|
||||
test2 = new VariantContextBuilder(test2).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(6,6))).make();
|
||||
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false, false);
|
||||
Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.823957, -1.000000, -6.686344, 0.000000, -1.952251, -9.686344});
|
||||
Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.823957, 0.000000, -1.686344, -3.000000, -4.452251, -8.686344});
|
||||
Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {0.000000, -0.676043, -1.662387, -1.676043, -2.628294, -4.862387});
|
||||
Genotype test2exp4 = makeGwithPLs("s4",C,T,new double[]{-7.371706, -3.547749, -1.434094, -1.547749, 0.000000, -1.234094});
|
||||
Assert.assertEquals(arraysEq(test2exp1.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp2.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp3.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(2).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp4.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(3).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp1.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp2.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(1).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp3.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(2).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp4.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(3).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -195,7 +195,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(3);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,2).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,2).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("3",Arrays.asList(Aref,T))).attribute(VCFConstants.ALLELE_COUNT_KEY,4).attribute(VCFConstants.ALLELE_NUMBER_KEY,22).make());
|
||||
supplTest1.add(makeVC("4",Arrays.asList(Aref,T),
|
||||
makeG("s_1",T,T),
|
||||
|
|
@ -205,25 +205,25 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
|
||||
Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
|
||||
Genotype test1exp3 = makeGwithPLs("t3",Aref,T,new double[]{-2.165587, -0.009773643, -1.811819});
|
||||
Assert.assertEquals(arraysEq(test1exp1.getPL(),_mleparse((List<Integer>) test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>) test1result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp3.getPL(),_mleparse((List<Integer>) test1result.getGenotype(2).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp1.getPL(),_mleparse((List<Integer>) test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>) test1result.getGenotype(1).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp3.getPL(),_mleparse((List<Integer>) test1result.getGenotype(2).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
|
||||
VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
|
||||
List<VariantContext> other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
|
||||
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,false);
|
||||
Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
|
||||
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testCalculatePosteriorHOM_VARtoHET() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[2] > GP[1]);
|
||||
}
|
||||
|
||||
|
|
@ -231,10 +231,10 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
private void testCalculatePosteriorHETtoHOM_VAR() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[2] < GP[1]);
|
||||
}
|
||||
|
||||
|
|
@ -242,10 +242,10 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
private void testCalculatePosteriorHOM_REFtoHET() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[0] > GP[1]);
|
||||
}
|
||||
|
||||
|
|
@ -253,10 +253,10 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
private void testCalculatePosteriorHETtoHOM_REF() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[0] < GP[1]);
|
||||
}
|
||||
|
||||
|
|
@ -266,7 +266,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
|
|
@ -287,7 +287,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
|
|
@ -307,7 +307,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
|
|
@ -317,11 +317,11 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s2",Aref,ATC,18,0,24),
|
||||
makeG("s3",Aref,ATC,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
System.out.println(test1result);
|
||||
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
|
||||
int[] PLs = test1result.getGenotype(0).getPL();
|
||||
Assert.assertEquals(PLs,GPs);
|
||||
}
|
||||
|
|
@ -332,12 +332,12 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
|
||||
System.out.println(test1result);
|
||||
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY));
|
||||
int[] PLs = test1result.getGenotype(0).getPL();
|
||||
Assert.assertEquals(PLs,GPs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ import htsjdk.variant.variantcontext.*;
|
|||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -143,7 +143,7 @@ public class VariantContextMergerUnitTest extends BaseTest {
|
|||
|
||||
// always add the reference and <ALT> alleles
|
||||
myAlleles.add(allAlleles.get(0));
|
||||
myAlleles.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
myAlleles.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
// optionally add another alternate allele
|
||||
if ( allelesIndex > 0 )
|
||||
myAlleles.add(allAlleles.get(allelesIndex));
|
||||
|
|
@ -180,31 +180,31 @@ public class VariantContextMergerUnitTest extends BaseTest {
|
|||
noCalls.add(Allele.NO_CALL);
|
||||
noCalls.add(Allele.NO_CALL);
|
||||
|
||||
final List<Allele> A_ALT = Arrays.asList(Aref, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> A_ALT = Arrays.asList(Aref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final Genotype gA_ALT = new GenotypeBuilder("A").PL(new int[]{0, 100, 1000}).alleles(noCalls).make();
|
||||
final VariantContext vcA_ALT = new VariantContextBuilder(VCbase).alleles(A_ALT).genotypes(gA_ALT).make();
|
||||
final Allele AAref = Allele.create("AA", true);
|
||||
final List<Allele> AA_ALT = Arrays.asList(AAref, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> AA_ALT = Arrays.asList(AAref, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final Genotype gAA_ALT = new GenotypeBuilder("AA").PL(new int[]{0, 80, 800}).alleles(noCalls).make();
|
||||
final VariantContext vcAA_ALT = new VariantContextBuilder(VCprevBase).alleles(AA_ALT).genotypes(gAA_ALT).make();
|
||||
final List<Allele> A_C = Arrays.asList(Aref, C);
|
||||
final Genotype gA_C = new GenotypeBuilder("A_C").PL(new int[]{30, 20, 10}).alleles(noCalls).make();
|
||||
final List<Allele> A_C_ALT = Arrays.asList(Aref, C, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> A_C_ALT = Arrays.asList(Aref, C, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final Genotype gA_C_ALT = new GenotypeBuilder("A_C").PL(standardPLs).alleles(noCalls).make();
|
||||
final VariantContext vcA_C_ALT = new VariantContextBuilder(VCbase).alleles(A_C_ALT).genotypes(gA_C_ALT).make();
|
||||
final List<Allele> A_G_ALT = Arrays.asList(Aref, G, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> A_G_ALT = Arrays.asList(Aref, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final Genotype gA_G_ALT = new GenotypeBuilder("A_G").PL(standardPLs).alleles(noCalls).make();
|
||||
final VariantContext vcA_G_ALT = new VariantContextBuilder(VCbase).alleles(A_G_ALT).genotypes(gA_G_ALT).make();
|
||||
final List<Allele> A_C_G = Arrays.asList(Aref, C, G);
|
||||
final Genotype gA_C_G = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30}).alleles(noCalls).make();
|
||||
final List<Allele> A_C_G_ALT = Arrays.asList(Aref, C, G, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> A_C_G_ALT = Arrays.asList(Aref, C, G, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final Genotype gA_C_G_ALT = new GenotypeBuilder("A_C_G").PL(new int[]{40, 20, 30, 20, 10, 30, 71, 72, 73, 74}).alleles(noCalls).make();
|
||||
final VariantContext vcA_C_G_ALT = new VariantContextBuilder(VCbase).alleles(A_C_G_ALT).genotypes(gA_C_G_ALT).make();
|
||||
final List<Allele> A_ATC_ALT = Arrays.asList(Aref, ATC, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> A_ATC_ALT = Arrays.asList(Aref, ATC, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final Genotype gA_ATC_ALT = new GenotypeBuilder("A_ATC").PL(standardPLs).alleles(noCalls).make();
|
||||
final VariantContext vcA_ATC_ALT = new VariantContextBuilder(VCbase).alleles(A_ATC_ALT).genotypes(gA_ATC_ALT).make();
|
||||
final Allele A = Allele.create("A", false);
|
||||
final List<Allele> AA_A_ALT = Arrays.asList(AAref, A, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
final List<Allele> AA_A_ALT = Arrays.asList(AAref, A, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
final Genotype gAA_A_ALT = new GenotypeBuilder("AA_A").PL(standardPLs).alleles(noCalls).make();
|
||||
final VariantContext vcAA_A_ALT = new VariantContextBuilder(VCprevBase).alleles(AA_A_ALT).genotypes(gAA_A_ALT).make();
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import htsjdk.variant.vcf.VCFConstants;
|
|||
import htsjdk.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -95,7 +96,7 @@ public class GVCFWriterUnitTest extends BaseTest {
|
|||
private List<Integer> standardPartition = Arrays.asList(1, 10, 20);
|
||||
private Allele REF = Allele.create("N", true);
|
||||
private Allele ALT = Allele.create("A");
|
||||
private List<Allele> ALLELES = Arrays.asList(REF, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
private List<Allele> ALLELES = Arrays.asList(REF, GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
private final String SAMPLE_NAME = "XXYYZZ";
|
||||
|
||||
@BeforeMethod
|
||||
|
|
@ -268,10 +269,10 @@ public class GVCFWriterUnitTest extends BaseTest {
|
|||
Assert.assertEquals(vc.getStart(), start);
|
||||
Assert.assertEquals(vc.getEnd(), stop);
|
||||
if ( nonRef ) {
|
||||
Assert.assertNotEquals(vc.getAlternateAllele(0), GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertNotEquals(vc.getAlternateAllele(0), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
} else {
|
||||
Assert.assertEquals(vc.getNAlleles(), 2);
|
||||
Assert.assertEquals(vc.getAlternateAllele(0), GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertEquals(vc.getAlternateAllele(0), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
Assert.assertEquals(vc.getAttributeAsInt(VCFConstants.END_KEY, -1), stop);
|
||||
Assert.assertTrue(vc.hasGenotypes());
|
||||
Assert.assertTrue(vc.hasGenotype(SAMPLE_NAME));
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@
|
|||
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
|
||||
|
||||
<!-- Version numbers for picard and htsjdk -->
|
||||
<htsjdk.version>1.123.1658</htsjdk.version>
|
||||
<picard.version>1.123.1620</picard.version>
|
||||
<htsjdk.version>1.127.1690</htsjdk.version>
|
||||
<picard.version>1.127.1667</picard.version>
|
||||
</properties>
|
||||
|
||||
<!-- Dependency configuration (versions, etc.) -->
|
||||
|
|
|
|||
|
|
@ -30,8 +30,6 @@ import htsjdk.variant.variantcontext.Allele;
|
|||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
|
|
@ -40,6 +38,7 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnot
|
|||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -144,15 +143,15 @@ public class AlleleBalance extends InfoFieldAnnotation {
|
|||
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
if ( weightHet > 0.0 ) {
|
||||
map.put("ABHet",ratioHet/weightHet);
|
||||
map.put(GATKVCFConstants.ALLELE_BALANCE_HET_KEY,ratioHet/weightHet);
|
||||
}
|
||||
|
||||
if ( weightHom > 0.0 ) {
|
||||
map.put("ABHom",ratioHom/weightHom);
|
||||
map.put(GATKVCFConstants.ALLELE_BALANCE_HOM_KEY,ratioHom/weightHom);
|
||||
}
|
||||
|
||||
if ( overallNonDiploid > 0.0 ) {
|
||||
map.put("OND",overallNonDiploid);
|
||||
map.put(GATKVCFConstants.NON_DIPLOID_RATIO_KEY,overallNonDiploid);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
|
@ -210,9 +209,10 @@ public class AlleleBalance extends InfoFieldAnnotation {
|
|||
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("ABHet","ABHom","OND"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ABHet", 1, VCFHeaderLineType.Float, "Allele Balance for heterozygous calls (ref/(ref+alt))"),
|
||||
new VCFInfoHeaderLine("ABHom", 1, VCFHeaderLineType.Float, "Allele Balance for homozygous calls (A/(A+O)) where A is the allele (ref or alt) and O is anything other"),
|
||||
new VCFInfoHeaderLine("OND", 1, VCFHeaderLineType.Float, "Overall non-diploid ratio (alleles/(alleles+non-alleles))")); }
|
||||
@Override
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(GATKVCFConstants.ALLELE_BALANCE_HET_KEY,
|
||||
GATKVCFConstants.ALLELE_BALANCE_HOM_KEY,
|
||||
GATKVCFConstants.NON_DIPLOID_RATIO_KEY);
|
||||
}
|
||||
}
|
||||
|
|
@ -43,6 +43,8 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
|||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -92,7 +94,7 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
|
|||
// and isBiallelic() while ignoring the <NON_REF> allele
|
||||
boolean biallelicSNP = vc.isSNP() && vc.isBiallelic();
|
||||
|
||||
if(vc.hasAllele(GVCF_NONREF)){
|
||||
if(vc.hasAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)){
|
||||
// If we have the GVCF <NON_REF> allele, then the SNP is biallelic
|
||||
// iff there are 3 alleles and both the reference and first alt
|
||||
// allele are length 1.
|
||||
|
|
@ -118,8 +120,6 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
|
|||
gb.attribute(getKeyNames().get(0), Double.valueOf(String.format("%.2f", ratio)));
|
||||
}
|
||||
|
||||
private static final Allele GVCF_NONREF = Allele.create("<NON_REF>", false);
|
||||
|
||||
private Double annotateWithPileup(final AlignmentContext stratifiedContext, final VariantContext vc) {
|
||||
|
||||
final HashMap<Byte, Integer> alleleCounts = new HashMap<>();
|
||||
|
|
@ -175,7 +175,7 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
|
|||
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("AB"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.ALLELE_BALANCE_KEY); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); }
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getFormatLine(getKeyNames().get(0))); }
|
||||
}
|
||||
|
|
@ -32,9 +32,10 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -80,12 +81,12 @@ import java.util.Map;
|
|||
counts[index]++;
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), counts);
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("BaseCounts"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.BASE_COUNTS_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseCounts", 4, VCFHeaderLineType.Integer, "Counts of each base")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
}
|
||||
|
|
@ -31,10 +31,11 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -80,12 +81,12 @@ public class LowMQ extends InfoFieldAnnotation {
|
|||
total += 1;
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%.04f,%.04f,%.00f", mq0/total, mq10/total, total));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("LowMQ"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.LOW_MQ_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 3, VCFHeaderLineType.Float, "3-tuple: <fraction of reads with MQ=0>,<fraction of reads with MQ<=10>,<total number of reads>")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,12 +33,13 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnota
|
|||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypeBuilder;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -76,11 +77,9 @@ public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
|||
gb.attribute(getKeyNames().get(0), mq0);
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
|
||||
new VCFFormatHeaderLine(getKeyNames().get(0), 1,
|
||||
VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
|
||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getFormatLine(getKeyNames().get(0))); }
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,10 +32,11 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -70,7 +71,6 @@ public class NBaseCount extends InfoFieldAnnotation {
|
|||
|
||||
for( final AlignmentContext context : stratifiedContexts.values() ) {
|
||||
for( final PileupElement p : context.getBasePileup()) {
|
||||
final String platform = p.getRead().getReadGroup().getPlatform();
|
||||
if( BaseUtils.isNBase( p.getBase() ) ) {
|
||||
countNBase++;
|
||||
} else if( BaseUtils.isRegularBase( p.getBase() ) ) {
|
||||
|
|
@ -78,12 +78,12 @@ public class NBaseCount extends InfoFieldAnnotation {
|
|||
}
|
||||
}
|
||||
}
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
final Map<String, Object> map = new HashMap<>();
|
||||
map.put(getKeyNames().get(0), String.format("%.4f", (double)countNBase / (double)(countNBase + countRegularBase + 1)));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("PercentNBase"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.N_BASE_COUNT_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBase", 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getKeyNames().get(0))); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,9 @@ import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -58,5 +60,11 @@ public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation {
|
|||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap);
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
public abstract List<VCFInfoHeaderLine> getDescriptions();
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
final List<VCFInfoHeaderLine> lines = new ArrayList<>(5);
|
||||
for (final String key : getKeyNames()) {
|
||||
lines.add(GATKVCFHeaderLines.getInfoLine(key));
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
}
|
||||
|
|
@ -41,6 +41,8 @@ import htsjdk.variant.vcf.*;
|
|||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -129,25 +131,22 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
private final double MIN_PROB_ERROR = 0.000001;
|
||||
private final double MAX_GENOTYPE_QUALITY = -6.0;
|
||||
|
||||
private final static String BEAGLE_MONO_FILTER_STRING = "BGL_SET_TO_MONOMORPHIC";
|
||||
private final static String ORIGINAL_ALT_ALLELE_INFO_KEY = "OriginalAltAllele";
|
||||
|
||||
public void initialize() {
|
||||
|
||||
// setup the header fields
|
||||
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<>();
|
||||
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFFormatHeaderLine("OG",1, VCFHeaderLineType.String, "Original Genotype input to Beagle"));
|
||||
hInfo.add(new VCFInfoHeaderLine("R2", 1, VCFHeaderLineType.Float, "r2 Value reported by Beagle on each site"));
|
||||
hInfo.add(new VCFInfoHeaderLine("NumGenotypesChanged", 1, VCFHeaderLineType.Integer, "The number of genotypes changed by Beagle"));
|
||||
hInfo.add(new VCFInfoHeaderLine(ORIGINAL_ALT_ALLELE_INFO_KEY, 1, VCFHeaderLineType.String, "The original alt allele for a site set to monomorphic by Beagle"));
|
||||
hInfo.add(new VCFFilterHeaderLine(BEAGLE_MONO_FILTER_STRING, "This site was set to monomorphic by Beagle"));
|
||||
hInfo.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ORIGINAL_GENOTYPE_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.BEAGLE_R2_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NUM_GENOTYPES_CHANGED_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_ALT_ALLELE_INFO_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.BEAGLE_MONO_FILTER_NAME));
|
||||
|
||||
if ( comp.isBound() ) {
|
||||
hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
|
||||
hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
|
||||
hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site"));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.BEAGLE_AC_COMP_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.BEAGLE_AF_COMP_KEY));
|
||||
hInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.BEAGLE_AN_COMP_KEY));
|
||||
}
|
||||
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variantCollection.variants.getName()));
|
||||
|
|
@ -237,7 +236,7 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
|
||||
// We have phased genotype in hp. Need to set the isRef field in the allele.
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
List<Allele> alleles = new ArrayList<>();
|
||||
|
||||
String alleleA = beagleGenotypePairs.get(0);
|
||||
String alleleB = beagleGenotypePairs.get(1);
|
||||
|
|
@ -298,7 +297,7 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
else
|
||||
genotypeQuality = log10(probWrongGenotype);
|
||||
|
||||
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getExtendedAttributes());
|
||||
HashMap<String,Object> originalAttributes = new HashMap<>(g.getExtendedAttributes());
|
||||
|
||||
// get original encoding and add to keynotype attributes
|
||||
String a1, a2, og;
|
||||
|
|
@ -320,11 +319,11 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
// See if Beagle switched genotypes
|
||||
if (! originalAlleleA.equals(Allele.NO_CALL) && beagleSwitchedGenotypes(bglAlleleA,originalAlleleA,bglAlleleB,originalAlleleB)){
|
||||
originalAttributes.put("OG",og);
|
||||
originalAttributes.put(GATKVCFConstants.ORIGINAL_GENOTYPE_KEY, og);
|
||||
numGenotypesChangedByBeagle++;
|
||||
}
|
||||
else {
|
||||
originalAttributes.put("OG",".");
|
||||
originalAttributes.put(GATKVCFConstants.ORIGINAL_GENOTYPE_KEY, ".");
|
||||
}
|
||||
Genotype imputedGenotype = new GenotypeBuilder(g).alleles(alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
|
||||
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
|
||||
|
|
@ -336,8 +335,8 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc_input).source("outputvcf").genotypes(genotypes);
|
||||
if ( ! ( beagleVarCounts > 0 || DONT_FILTER_MONOMORPHIC_SITES ) ) {
|
||||
builder.attribute(ORIGINAL_ALT_ALLELE_INFO_KEY, vc_input.getAlternateAllele(0));
|
||||
builder.alleles(Collections.singleton(vc_input.getReference())).filter(BEAGLE_MONO_FILTER_STRING);
|
||||
builder.attribute(GATKVCFConstants.ORIGINAL_ALT_ALLELE_INFO_KEY, vc_input.getAlternateAllele(0));
|
||||
builder.alleles(Collections.singleton(vc_input.getReference())).filter(GATKVCFConstants.BEAGLE_MONO_FILTER_NAME);
|
||||
}
|
||||
|
||||
// re-compute chromosome counts
|
||||
|
|
@ -345,15 +344,15 @@ public class BeagleOutputToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
// Get Hapmap AC and AF
|
||||
if (vc_comp != null) {
|
||||
builder.attribute("ACH", alleleCountH.toString() );
|
||||
builder.attribute("ANH", chrCountH.toString() );
|
||||
builder.attribute("AFH", String.format("%4.2f", (double)alleleCountH/chrCountH) );
|
||||
builder.attribute(GATKVCFConstants.BEAGLE_AC_COMP_KEY, alleleCountH.toString() );
|
||||
builder.attribute(GATKVCFConstants.BEAGLE_AN_COMP_KEY, chrCountH.toString() );
|
||||
builder.attribute(GATKVCFConstants.BEAGLE_AF_COMP_KEY, String.format("%4.2f", (double)alleleCountH/chrCountH) );
|
||||
|
||||
}
|
||||
|
||||
builder.attribute("NumGenotypesChanged", numGenotypesChangedByBeagle );
|
||||
builder.attribute(GATKVCFConstants.NUM_GENOTYPES_CHANGED_KEY, numGenotypesChangedByBeagle );
|
||||
if( !beagleR2Feature.getR2value().equals(Double.NaN) ) {
|
||||
builder.attribute("R2", beagleR2Feature.getR2value().toString() );
|
||||
builder.attribute(GATKVCFConstants.BEAGLE_R2_KEY, beagleR2Feature.getR2value().toString() );
|
||||
}
|
||||
|
||||
vcfWriter.add(builder.make());
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import htsjdk.variant.vcf.VCFConstants;
|
|||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -72,9 +73,9 @@ public class AlleleCount extends VariantStratifier {
|
|||
|
||||
try {
|
||||
if ( eval.isBiallelic() ) {
|
||||
if ( eval.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) ) {
|
||||
if ( eval.hasAttribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY) ) {
|
||||
// the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
|
||||
AC = Math.min(eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0), nchrom);
|
||||
AC = Math.min(eval.getAttributeAsInt(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, 0), nchrom);
|
||||
} else if ( eval.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
AC = eval.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.gatk.utils.variant.ChromosomeCountConstants;
|
|||
import org.broadinstitute.gatk.engine.SampleUtils;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
|
@ -337,7 +338,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
if ( mergedVC == null )
|
||||
continue;
|
||||
|
||||
if ( mergedVC.hasAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) )
|
||||
if ( mergedVC.hasAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) )
|
||||
throw new UserException("CombineVariants should not be used to merge gVCFs produced by the HaplotypeCaller; use CombineGVCFs instead");
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(mergedVC);
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ import htsjdk.samtools.liftover.LiftOver;
|
|||
import htsjdk.samtools.util.Interval;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.SAMFileReader;
|
||||
import htsjdk.variant.vcf.VCFHeader;
|
||||
import htsjdk.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||
|
|
@ -41,9 +43,10 @@ import org.broadinstitute.gatk.engine.walkers.RodWalker;
|
|||
import org.broadinstitute.gatk.engine.SampleUtils;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.variantcontext.writer.Options;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
|
@ -109,12 +112,12 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
Map<String, VCFHeader> vcfHeaders = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
|
||||
|
||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
Set<VCFHeaderLine> metaData = new HashSet<>();
|
||||
if ( vcfHeaders.containsKey(trackName) )
|
||||
metaData.addAll(vcfHeaders.get(trackName).getMetaDataInSortedOrder());
|
||||
if ( RECORD_ORIGINAL_LOCATION ) {
|
||||
metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record"));
|
||||
metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record"));
|
||||
metaData.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_CONTIG_KEY));
|
||||
metaData.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_START_KEY));
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -140,8 +143,8 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
if ( RECORD_ORIGINAL_LOCATION ) {
|
||||
vc = new VariantContextBuilder(vc)
|
||||
.attribute("OriginalChr", fromInterval.getSequence())
|
||||
.attribute("OriginalStart", fromInterval.getStart()).make();
|
||||
.attribute(GATKVCFConstants.ORIGINAL_CONTIG_KEY, fromInterval.getSequence())
|
||||
.attribute(GATKVCFConstants.ORIGINAL_START_KEY, fromInterval.getStart()).make();
|
||||
}
|
||||
|
||||
if ( originalVC.isSNP() && originalVC.isBiallelic() && GATKVariantContextUtils.getSNPSubstitutionType(originalVC) != GATKVariantContextUtils.getSNPSubstitutionType(vc) ) {
|
||||
|
|
|
|||
|
|
@ -39,6 +39,8 @@ import org.broadinstitute.gatk.engine.SampleUtils;
|
|||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
|
@ -422,9 +424,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
||||
|
||||
if (KEEP_ORIGINAL_CHR_COUNTS) {
|
||||
headerLines.add(new VCFInfoHeaderLine("AC_Orig", VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC"));
|
||||
headerLines.add(new VCFInfoHeaderLine("AF_Orig", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF"));
|
||||
headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AC_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AF_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AN_KEY));
|
||||
}
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
|
||||
|
|
@ -695,8 +697,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
|
||||
if ( vc.getNSamples() != sub.getNSamples() ) {
|
||||
builder.rmAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY);
|
||||
builder.rmAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
builder.rmAttribute(GATKVCFConstants.MLE_ALLELE_COUNT_KEY);
|
||||
builder.rmAttribute(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
}
|
||||
|
||||
// Remove a fraction of the genotypes if needed
|
||||
|
|
@ -763,11 +765,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||
builder.attribute("AC_Orig", getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY), indexOfOriginalAlleleForNewAllele));
|
||||
builder.attribute(GATKVCFConstants.ORIGINAL_AC_KEY, getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY), indexOfOriginalAlleleForNewAllele));
|
||||
if ( originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )
|
||||
builder.attribute("AF_Orig", getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), indexOfOriginalAlleleForNewAllele));
|
||||
builder.attribute(GATKVCFConstants.ORIGINAL_AF_KEY, getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), indexOfOriginalAlleleForNewAllele));
|
||||
if ( originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) )
|
||||
builder.attribute("AN_Orig", originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
|
||||
builder.attribute(GATKVCFConstants.ORIGINAL_AN_KEY, originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
|
||||
}
|
||||
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||
|
|
@ -786,7 +788,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
if ( sawDP )
|
||||
builder.attribute("DP", depth);
|
||||
builder.attribute(VCFConstants.DEPTH_KEY, depth);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils;
|
|||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -1081,13 +1081,13 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* @param nonRefAllele the non-ref allele.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code nonRefAllele} is anything but the designated <NON_REF>
|
||||
* symbolic allele {@link GATKVariantContextUtils#NON_REF_SYMBOLIC_ALLELE}.
|
||||
* symbolic allele {@link org.broadinstitute.gatk.utils.variant.GATKVCFConstants#NON_REF_SYMBOLIC_ALLELE}.
|
||||
*/
|
||||
public void addNonReferenceAllele(final A nonRefAllele) {
|
||||
|
||||
if (nonRefAllele == null)
|
||||
throw new IllegalArgumentException("non-ref allele cannot be null");
|
||||
if (!nonRefAllele.equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE))
|
||||
if (!nonRefAllele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE))
|
||||
throw new IllegalArgumentException("the non-ref allele is not valid");
|
||||
// Already present?
|
||||
if (alleles.alleleIndex(nonRefAllele) != -1)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.utils.variant;
|
||||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
|
||||
/**
|
||||
* This class contains any constants (primarily FORMAT/INFO keys) in VCF files used by the GATK.
|
||||
* Note that VCF-standard constants are in VCFConstants, in htsjdk. Keys in header lines should
|
||||
* have matching entries in GATKVCFHeaderLines
|
||||
*/
|
||||
public final class GATKVCFConstants {
|
||||
|
||||
//INFO keys
|
||||
public static final String ALLELE_BALANCE_HET_KEY = "ABHet";
|
||||
public static final String ALLELE_BALANCE_HOM_KEY = "ABHom";
|
||||
public static final String ORIGINAL_AC_KEY = "AC_Orig"; //SelectVariants
|
||||
public static final String BEAGLE_AC_COMP_KEY = "ACH"; //BeagleOutputToVCF
|
||||
public static final String ORIGINAL_AF_KEY = "AF_Orig"; //SelectVariants
|
||||
public static final String BEAGLE_AF_COMP_KEY = "AFH"; //BeagleOutputToVCF
|
||||
public static final String ORIGINAL_AN_KEY = "AN_Orig"; //SelectVariants
|
||||
public static final String BEAGLE_AN_COMP_KEY = "ANH"; //BeagleOutputToVCF
|
||||
public static final String BASE_COUNTS_KEY = "BaseCounts";
|
||||
public static final String BASE_QUAL_RANK_SUM_KEY = "BaseQRankSum";
|
||||
public static final String GENOTYPE_AND_VALIDATE_STATUS_KEY = "callStatus";
|
||||
public static final String CLIPPING_RANK_SUM_KEY = "ClippingRankSum";
|
||||
public static final String CULPRIT_KEY = "culprit";
|
||||
public static final String SPANNING_DELETIONS_KEY = "Dels";
|
||||
public static final String DOWNSAMPLED_KEY = "DS";
|
||||
public static final String FISHER_STRAND_KEY = "FS";
|
||||
public static final String GC_CONTENT_KEY = "GC";
|
||||
public static final String GQ_MEAN_KEY = "GQ_MEAN";
|
||||
public static final String GQ_STDEV_KEY = "GQ_STDDEV";
|
||||
public static final String HAPLOTYPE_SCORE_KEY = "HaplotypeScore";
|
||||
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
|
||||
public static final String HOMOPOLYMER_RUN_KEY = "HRun";
|
||||
public static final String HARDY_WEINBERG_KEY = "HW";
|
||||
public static final String AVG_INTERVAL_DP_KEY = "IDP"; //DiagnoseTargets
|
||||
public static final String INTERVAL_GC_CONTENT_KEY = "IGC";
|
||||
public static final String INBREEDING_COEFFICIENT_KEY = "InbreedingCoeff";
|
||||
public static final String LIKELIHOOD_RANK_SUM_KEY = "LikelihoodRankSum";
|
||||
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
|
||||
public static final String LOW_MQ_KEY = "LowMQ";
|
||||
public static final String MLE_ALLELE_COUNT_KEY = "MLEAC";
|
||||
public static final String MLE_ALLELE_FREQUENCY_KEY = "MLEAF";
|
||||
public static final String MLE_PER_SAMPLE_ALLELE_COUNT_KEY = "MLPSAC";
|
||||
public static final String MLE_PER_SAMPLE_ALLELE_FRACTION_KEY = "MLPSAF";
|
||||
public static final String MAP_QUAL_RANK_SUM_KEY = "MQRankSum";
|
||||
public static final String MENDEL_VIOLATION_LR_KEY = "MVLR";
|
||||
public static final String NOCALL_CHROM_KEY = "NCC";
|
||||
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
|
||||
public static final String NEGATIVE_LABEL_KEY = "NEGATIVE_TRAIN_SITE";
|
||||
public static final String NUM_GENOTYPES_CHANGED_KEY = "NumGenotypesChanged"; //BeagleOutputToVCF
|
||||
public static final String NON_DIPLOID_RATIO_KEY = "OND";
|
||||
public static final String ORIGINAL_ALT_ALLELE_INFO_KEY = "OriginalAltAllele"; //BeagleOutputToVCF
|
||||
public static final String ORIGINAL_CONTIG_KEY = "OriginalChr"; //LiftoverVariants
|
||||
public static final String ORIGINAL_START_KEY = "OriginalStart"; //LiftoverVariants
|
||||
public static final String N_BASE_COUNT_KEY = "PercentNBase";
|
||||
public static final String RBP_INCONSISTENT_KEY = "PhasingInconsistent"; //ReadBackedPhasing
|
||||
public static final String GENOTYPE_PRIOR_KEY = "PG";
|
||||
public static final String POSITIVE_LABEL_KEY = "POSITIVE_TRAIN_SITE";
|
||||
public static final String QUAL_BY_DEPTH_KEY = "QD";
|
||||
public static final String BEAGLE_R2_KEY = "R2"; //BeagleOutputToVCF
|
||||
public static final String READ_POS_RANK_SUM_KEY = "ReadPosRankSum";
|
||||
public static final String REFSAMPLE_DEPTH_KEY = "REFDEPTH";
|
||||
public static final String REPEATS_PER_ALLELE_KEY = "RPA";
|
||||
public static final String REPEAT_UNIT_KEY = "RU";
|
||||
public static final String SAMPLE_LIST_KEY = "Samples";
|
||||
public static final String STRAND_ODDS_RATIO_KEY = "SOR";
|
||||
public static final String STR_PRESENT_KEY = "STR";
|
||||
public static final String TRANSMISSION_DISEQUILIBRIUM_KEY = "TDT";
|
||||
public static final String VARIANT_TYPE_KEY = "VariantType";
|
||||
public static final String VQS_LOD_KEY = "VQSLOD";
|
||||
|
||||
//FORMAT keys
|
||||
public static final String ALLELE_BALANCE_KEY = "AB";
|
||||
public static final String PL_FOR_ALL_SNP_ALLELES_KEY = "APL";
|
||||
public static final String RBP_HAPLOTYPE_KEY = "HP"; //ReadBackedPhasing
|
||||
public static final String AVG_INTERVAL_DP_BY_SAMPLE_KEY = "IDP"; //DiagnoseTargets
|
||||
public static final String JOINT_LIKELIHOOD_TAG_NAME = "JL"; //FamilyLikelihoodsUtils
|
||||
public static final String JOINT_POSTERIOR_TAG_NAME = "JP"; //FamilyLikelihoodsUtils
|
||||
public static final String LOW_COVERAGE_LOCI = "LL"; //DiagnoseTargets
|
||||
public final static String MIN_DP_FORMAT_KEY = "MIN_DP";
|
||||
public static final String MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY = "MQ0";
|
||||
public static final String ORIGINAL_GENOTYPE_KEY = "OG"; //BeagleOutputToVCF
|
||||
public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT";
|
||||
public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID";
|
||||
public static final String PHRED_SCALED_POSTERIORS_KEY = "PP"; //FamilyLikelihoodsUtils / PosteriorLikelihoodsUtils
|
||||
public static final String STRAND_COUNT_BY_SAMPLE_KEY = "SAC";
|
||||
public static final String STRAND_BIAS_BY_SAMPLE_KEY = "SB";
|
||||
public final static String TRANSMISSION_PROBABILITY_KEY = "TP"; //PhaseByTransmission
|
||||
public static final String ZERO_COVERAGE_LOCI = "ZL"; //DiagnoseTargets
|
||||
|
||||
//FILTERS
|
||||
/* Note that many filters used throughout GATK (most notably in VariantRecalibration) are dynamic,
|
||||
their names (or descriptions) depend on some threshold. Those filters are not included here
|
||||
*/
|
||||
public static final String BEAGLE_MONO_FILTER_NAME = "BGL_SET_TO_MONOMORPHIC";
|
||||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||
|
||||
//Alleles
|
||||
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
|
||||
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
|
||||
}
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.utils.variant;
|
||||
|
||||
import htsjdk.variant.vcf.VCFFilterHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import static org.broadinstitute.gatk.utils.variant.GATKVCFConstants.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This class contains the VCFHeaderLine definitions for the annotation keys in GATKVCFConstants.
|
||||
* VCF-standard header lines are in VCFStandardHeaderLines, in htsjdk
|
||||
*/
|
||||
public class GATKVCFHeaderLines {
|
||||
|
||||
public static VCFInfoHeaderLine getInfoLine(final String id) { return infoLines.get(id); }
|
||||
public static VCFFormatHeaderLine getFormatLine(final String id) { return formatLines.get(id); }
|
||||
public static VCFFilterHeaderLine getFilterLine(final String id) { return filterLines.get(id); }
|
||||
|
||||
private static Map<String, VCFInfoHeaderLine> infoLines = new HashMap<>(60);
|
||||
private static Map<String, VCFFormatHeaderLine> formatLines = new HashMap<>(25);
|
||||
private static Map<String, VCFFilterHeaderLine> filterLines = new HashMap<>(2);
|
||||
|
||||
private static void addFormatLine(final VCFFormatHeaderLine line) {
|
||||
formatLines.put(line.getID(), line);
|
||||
}
|
||||
|
||||
private static void addInfoLine(final VCFInfoHeaderLine line) {
|
||||
infoLines.put(line.getID(), line);
|
||||
}
|
||||
|
||||
private static void addFilterLine(final VCFFilterHeaderLine line) {
|
||||
filterLines.put(line.getID(), line);
|
||||
}
|
||||
|
||||
static {
|
||||
addFilterLine(new VCFFilterHeaderLine(LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
addFilterLine(new VCFFilterHeaderLine(BEAGLE_MONO_FILTER_NAME, "This site was set to monomorphic by Beagle"));
|
||||
|
||||
addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype"));
|
||||
addFormatLine(new VCFFormatHeaderLine(MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample"));
|
||||
addFormatLine(new VCFFormatHeaderLine(MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
|
||||
addFormatLine(new VCFFormatHeaderLine(MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
|
||||
addFormatLine(new VCFFormatHeaderLine(STRAND_COUNT_BY_SAMPLE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of reads on the forward and reverse strand supporting each allele (including reference)"));
|
||||
addFormatLine(new VCFFormatHeaderLine(STRAND_BIAS_BY_SAMPLE_KEY, 4, VCFHeaderLineType.Integer, "Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."));
|
||||
addFormatLine(new VCFFormatHeaderLine(MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
|
||||
addFormatLine(new VCFFormatHeaderLine(MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
|
||||
addFormatLine(new VCFFormatHeaderLine(PL_FOR_ALL_SNP_ALLELES_KEY, 10, VCFHeaderLineType.Integer, "Phred-scaled genotype likelihoods for all 4 possible bases regardless of whether there is statistical evidence for them. Ordering is always PL for AA AC CC GA GC GG TA TC TG TT."));
|
||||
addFormatLine(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_ID_KEY, 1, VCFHeaderLineType.String, "Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"));
|
||||
addFormatLine(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
|
||||
addFormatLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_KEY, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block"));
|
||||
addFormatLine(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_KEY, 1, VCFHeaderLineType.Integer, "Phred score of the genotype combination and phase given that the genotypes are correct"));
|
||||
addFormatLine(new VCFFormatHeaderLine(RBP_HAPLOTYPE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Read-backed phasing haplotype identifiers"));
|
||||
addFormatLine(new VCFFormatHeaderLine(AVG_INTERVAL_DP_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Float, "Average sample depth across the interval. Sum of the sample specific depth in all loci divided by interval size."));
|
||||
addFormatLine(new VCFFormatHeaderLine(LOW_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with low coverage (below the minimum coverage) but not zero."));
|
||||
addFormatLine(new VCFFormatHeaderLine(ZERO_COVERAGE_LOCI, 1, VCFHeaderLineType.Integer, "Number of loci for this sample, in this interval with zero coverage."));
|
||||
addFormatLine(new VCFFormatHeaderLine(PHRED_SCALED_POSTERIORS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Phred-scaled Posterior Genotype Probabilities"));
|
||||
addFormatLine(new VCFFormatHeaderLine(JOINT_LIKELIHOOD_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint likelihood of the genotype combination (before applying family priors)"));
|
||||
addFormatLine(new VCFFormatHeaderLine(JOINT_POSTERIOR_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint posterior probability of the genotype combination (after applying family priors)"));
|
||||
addFormatLine(new VCFFormatHeaderLine(ORIGINAL_GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Original Genotype input to Beagle"));
|
||||
|
||||
addInfoLine(new VCFInfoHeaderLine(MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"));
|
||||
addInfoLine(new VCFInfoHeaderLine(MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"));
|
||||
addInfoLine(new VCFInfoHeaderLine(DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
|
||||
addInfoLine(new VCFInfoHeaderLine(ALLELE_BALANCE_HET_KEY, 1, VCFHeaderLineType.Float, "Allele Balance for heterozygous calls (ref/(ref+alt))"));
|
||||
addInfoLine(new VCFInfoHeaderLine(ALLELE_BALANCE_HOM_KEY, 1, VCFHeaderLineType.Float, "Allele Balance for homozygous calls (A/(A+O)) where A is the allele (ref or alt) and O is anything other"));
|
||||
addInfoLine(new VCFInfoHeaderLine(NON_DIPLOID_RATIO_KEY, 1, VCFHeaderLineType.Float, "Overall non-diploid ratio (alleles/(alleles+non-alleles))"));
|
||||
addInfoLine(new VCFInfoHeaderLine(BASE_COUNTS_KEY, 4, VCFHeaderLineType.Integer, "Counts of each base"));
|
||||
addInfoLine(new VCFInfoHeaderLine(LOW_MQ_KEY, 3, VCFHeaderLineType.Float, "3-tuple: <fraction of reads with MQ=0>,<fraction of reads with MQ<=10>,<total number of reads>"));
|
||||
addInfoLine(new VCFInfoHeaderLine(N_BASE_COUNT_KEY, 1, VCFHeaderLineType.Float, "Percentage of N bases in the pileup"));
|
||||
addInfoLine(new VCFInfoHeaderLine(BASE_QUAL_RANK_SUM_KEY, 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"));
|
||||
addInfoLine(new VCFInfoHeaderLine(CLIPPING_RANK_SUM_KEY, 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases"));
|
||||
addInfoLine(new VCFInfoHeaderLine(FISHER_STRAND_KEY, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
|
||||
addInfoLine(new VCFInfoHeaderLine(GC_CONTENT_KEY, 1, VCFHeaderLineType.Integer, "GC content around the variant (see docs for window size details)"));
|
||||
addInfoLine(new VCFInfoHeaderLine(NOCALL_CHROM_KEY, 1, VCFHeaderLineType.Integer, "Number of no-called samples"));
|
||||
addInfoLine(new VCFInfoHeaderLine(GQ_MEAN_KEY, 1, VCFHeaderLineType.Float, "Mean of all GQ values"));
|
||||
addInfoLine(new VCFInfoHeaderLine(GQ_STDEV_KEY, 1, VCFHeaderLineType.Float, "Standard deviation of all GQ values"));
|
||||
addInfoLine(new VCFInfoHeaderLine(HAPLOTYPE_SCORE_KEY, 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
|
||||
addInfoLine(new VCFInfoHeaderLine(HARDY_WEINBERG_KEY, 1, VCFHeaderLineType.Float, "Phred-scaled p-value for Hardy-Weinberg violation"));
|
||||
addInfoLine(new VCFInfoHeaderLine(HOMOPOLYMER_RUN_KEY, 1, VCFHeaderLineType.Integer, "Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"));
|
||||
addInfoLine(new VCFInfoHeaderLine(INBREEDING_COEFFICIENT_KEY, 1, VCFHeaderLineType.Float, "Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"));
|
||||
addInfoLine(new VCFInfoHeaderLine(LIKELIHOOD_RANK_SUM_KEY, 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref haplotype likelihoods"));
|
||||
addInfoLine(new VCFInfoHeaderLine(MAP_QUAL_RANK_SUM_KEY, 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"));
|
||||
addInfoLine(new VCFInfoHeaderLine(MENDEL_VIOLATION_LR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]"));
|
||||
addInfoLine(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= 20 for all trio members)=[comma-delimited list of child samples]"));
|
||||
addInfoLine(new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= 10 for child, GQ > 0 for parents)=[comma-delimited list of child samples]"));
|
||||
addInfoLine(new VCFInfoHeaderLine(QUAL_BY_DEPTH_KEY, 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
|
||||
addInfoLine(new VCFInfoHeaderLine(READ_POS_RANK_SUM_KEY, 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"));
|
||||
addInfoLine(new VCFInfoHeaderLine(SAMPLE_LIST_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples"));
|
||||
addInfoLine(new VCFInfoHeaderLine(SPANNING_DELETIONS_KEY, 1, VCFHeaderLineType.Float, "Fraction of Reads Containing Spanning Deletions"));
|
||||
addInfoLine(new VCFInfoHeaderLine(STRAND_ODDS_RATIO_KEY, 1, VCFHeaderLineType.Float, "Symmetric Odds Ratio of 2x2 contingency table to detect strand bias"));
|
||||
addInfoLine(new VCFInfoHeaderLine(STR_PRESENT_KEY, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"));
|
||||
addInfoLine(new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"));
|
||||
addInfoLine(new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)"));
|
||||
addInfoLine(new VCFInfoHeaderLine(TRANSMISSION_DISEQUILIBRIUM_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test."));
|
||||
addInfoLine(new VCFInfoHeaderLine(VARIANT_TYPE_KEY, 1, VCFHeaderLineType.String, "Variant type description"));
|
||||
addInfoLine(new VCFInfoHeaderLine(NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
|
||||
addInfoLine(new VCFInfoHeaderLine(REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth"));
|
||||
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_AC_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Original AC"));
|
||||
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_AF_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Original AF"));
|
||||
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_AN_KEY, 1, VCFHeaderLineType.Integer, "Original AN"));
|
||||
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_CONTIG_KEY, 1, VCFHeaderLineType.String, "Original contig name for the record"));
|
||||
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_START_KEY, 1, VCFHeaderLineType.Integer, "Original start position for the record"));
|
||||
addInfoLine(new VCFInfoHeaderLine(VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
|
||||
addInfoLine(new VCFInfoHeaderLine(CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
|
||||
addInfoLine(new VCFInfoHeaderLine(POSITIVE_LABEL_KEY, 1, VCFHeaderLineType.Flag, "This variant was used to build the positive training set of good variants"));
|
||||
addInfoLine(new VCFInfoHeaderLine(NEGATIVE_LABEL_KEY, 1, VCFHeaderLineType.Flag, "This variant was used to build the negative training set of bad variants"));
|
||||
addInfoLine(new VCFInfoHeaderLine(RBP_INCONSISTENT_KEY, 0, VCFHeaderLineType.Flag, "Are the reads significantly haplotype-inconsistent?"));
|
||||
addInfoLine(new VCFInfoHeaderLine(GENOTYPE_AND_VALIDATE_STATUS_KEY, 1, VCFHeaderLineType.String, "Value from the validation VCF"));
|
||||
addInfoLine(new VCFInfoHeaderLine(AVG_INTERVAL_DP_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
addInfoLine(new VCFInfoHeaderLine(INTERVAL_GC_CONTENT_KEY, 1, VCFHeaderLineType.Float, "GC Content of the interval"));
|
||||
addInfoLine(new VCFInfoHeaderLine(GENOTYPE_PRIOR_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Genotype Likelihood Prior"));
|
||||
addInfoLine(new VCFInfoHeaderLine(BEAGLE_R2_KEY, 1, VCFHeaderLineType.Float, "r2 Value reported by Beagle on each site"));
|
||||
addInfoLine(new VCFInfoHeaderLine(NUM_GENOTYPES_CHANGED_KEY, 1, VCFHeaderLineType.Integer, "The number of genotypes changed by Beagle"));
|
||||
addInfoLine(new VCFInfoHeaderLine(ORIGINAL_ALT_ALLELE_INFO_KEY, 1, VCFHeaderLineType.String, "The original alt allele for a site set to monomorphic by Beagle"));
|
||||
addInfoLine(new VCFInfoHeaderLine(BEAGLE_AC_COMP_KEY, 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site"));
|
||||
addInfoLine(new VCFInfoHeaderLine(BEAGLE_AF_COMP_KEY, 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site"));
|
||||
addInfoLine(new VCFInfoHeaderLine(BEAGLE_AN_COMP_KEY, 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site"));
|
||||
}
|
||||
}
|
||||
|
|
@ -55,9 +55,6 @@ public class GATKVariantContextUtils {
|
|||
@Deprecated
|
||||
public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
|
||||
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
|
||||
|
||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -3,13 +3,13 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>picard</groupId>
|
||||
<artifactId>picard</artifactId>
|
||||
<version>1.123.1620</version>
|
||||
<version>1.127.1667</version>
|
||||
<name>picard</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>samtools</groupId>
|
||||
<artifactId>htsjdk</artifactId>
|
||||
<version>1.123.1658</version>
|
||||
<version>1.127.1690</version>
|
||||
</dependency>
|
||||
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
|
||||
<dependency>
|
||||
Binary file not shown.
|
|
@ -3,7 +3,7 @@
|
|||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>samtools</groupId>
|
||||
<artifactId>htsjdk</artifactId>
|
||||
<version>1.123.1658</version>
|
||||
<version>1.127.1690</version>
|
||||
<name>htsjdk</name>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
Loading…
Reference in New Issue