Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
61fcbcb190
|
|
@ -2,6 +2,7 @@ library(gsalib)
|
|||
library(ggplot2)
|
||||
library(gplots)
|
||||
library(tools)
|
||||
library(reshape)
|
||||
|
||||
#
|
||||
# Standard command line switch. Can we loaded interactively for development
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
|
@ -221,6 +222,10 @@ public class GenomeAnalysisEngine {
|
|||
if (this.getArguments().nonDeterministicRandomSeed)
|
||||
resetRandomGenerator(System.currentTimeMillis());
|
||||
|
||||
// TODO -- REMOVE ME WHEN WE STOP BCF testing
|
||||
if ( this.getArguments().USE_SLOW_GENOTYPES )
|
||||
GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;
|
||||
|
||||
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
||||
if (this.getArguments().BQSR_RECAL_FILE != null)
|
||||
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels);
|
||||
|
|
|
|||
|
|
@ -336,6 +336,11 @@ public class GATKArgumentCollection {
|
|||
public boolean generateShadowBCF = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
@Argument(fullName="useSlowGenotypes",shortName = "useSlowGenotypes",doc="",required=false)
|
||||
@Hidden
|
||||
public boolean USE_SLOW_GENOTYPES = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
/**
|
||||
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
|
||||
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
|
||||
|
|
|
|||
|
|
@ -38,10 +38,7 @@ import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
|||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
|
@ -58,6 +55,8 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
*/
|
||||
private static Logger logger = Logger.getLogger(VariantContextWriterStorage.class);
|
||||
|
||||
private final static int BUFFER_SIZE = 1048576;
|
||||
|
||||
protected final File file;
|
||||
protected OutputStream stream;
|
||||
protected final VariantContextWriter writer;
|
||||
|
|
@ -93,7 +92,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
if ( stub.isCompressed() )
|
||||
stream = new BlockCompressedOutputStream(file);
|
||||
else
|
||||
stream = new PrintStream(file);
|
||||
stream = new PrintStream(new BufferedOutputStream(new FileOutputStream(file), BUFFER_SIZE));
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file, "Unable to open target output stream", ex);
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ import java.util.List;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class VariantContextWriterStub implements Stub<VariantContextWriter>, VariantContextWriter {
|
||||
public final static boolean UPDATE_CONTIG_HEADERS = true;
|
||||
|
||||
/**
|
||||
* The engine, central to the GATK's processing.
|
||||
*/
|
||||
|
|
@ -215,7 +217,8 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
}
|
||||
|
||||
//vcfHeader = VCFUtils.withUpdatedContigs(vcfHeader, engine);
|
||||
if ( UPDATE_CONTIG_HEADERS )
|
||||
vcfHeader = VCFUtils.withUpdatedContigs(vcfHeader, engine);
|
||||
}
|
||||
|
||||
outputTracker.getStorage(this).writeHeader(vcfHeader);
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
Collection<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
Genotype call = new Genotype(name, genotypeAlleles);
|
||||
Genotype call = GenotypeBuilder.create(name, genotypeAlleles);
|
||||
|
||||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
|
|
@ -344,7 +344,7 @@ public class VariantContextAdaptors {
|
|||
alleles.add(allele2);
|
||||
}
|
||||
|
||||
Genotype g = new Genotype(samples[i], myAlleles);
|
||||
Genotype g = GenotypeBuilder.create(samples[i], myAlleles);
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -21,15 +22,12 @@ import java.util.*;
|
|||
*/
|
||||
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, final GenotypeBuilder gb) {
|
||||
Double ratio = annotateSNP(stratifiedContext, vc, g);
|
||||
if (ratio == null)
|
||||
return null;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", ratio.doubleValue()));
|
||||
return map;
|
||||
return;
|
||||
|
||||
gb.attribute(getKeyNames().get(0), Double.valueOf(String.format("%.2f", ratio.doubleValue())));
|
||||
}
|
||||
|
||||
private Double annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -14,6 +15,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -44,22 +46,20 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
|
||||
private static final String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
return;
|
||||
|
||||
if ( vc.isSNP() )
|
||||
return annotateSNP(stratifiedContext, vc);
|
||||
if ( vc.isIndel() )
|
||||
return annotateIndel(stratifiedContext, vc);
|
||||
|
||||
return null;
|
||||
annotateSNP(stratifiedContext, vc, gb);
|
||||
else if ( vc.isIndel() )
|
||||
annotateIndel(stratifiedContext, vc, gb);
|
||||
}
|
||||
|
||||
private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
|
||||
private void annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
||||
|
||||
if ( ! stratifiedContext.hasBasePileup() )
|
||||
return null;
|
||||
return;
|
||||
|
||||
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
|
||||
for ( Allele allele : vc.getAlleles() )
|
||||
|
|
@ -72,22 +72,21 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
|
||||
// we need to add counts in the correct order
|
||||
Integer[] counts = new Integer[alleleCounts.size()];
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
|
||||
|
||||
return toADAnnotation(counts);
|
||||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private Map<String,Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) {
|
||||
|
||||
private void annotateIndel(AlignmentContext stratifiedContext, VariantContext vc, GenotypeBuilder gb) {
|
||||
if ( ! stratifiedContext.hasBasePileup() )
|
||||
return null;
|
||||
return;
|
||||
|
||||
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
if ( pileup == null )
|
||||
return null;
|
||||
return;
|
||||
|
||||
final HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
|
||||
alleleCounts.put(REF_ALLELE, 0);
|
||||
|
|
@ -123,16 +122,12 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
}
|
||||
|
||||
Integer[] counts = new Integer[alleleCounts.size()];
|
||||
int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(REF_ALLELE);
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
counts[i+1] = alleleCounts.get( getAlleleRepresentation(vc.getAlternateAllele(i)) );
|
||||
|
||||
return toADAnnotation(counts);
|
||||
}
|
||||
|
||||
private final Map<String, Object> toADAnnotation(final Integer[] counts) {
|
||||
return Collections.singletonMap(getKeyNames().get(0), (Object)Arrays.asList(counts));
|
||||
gb.AD(counts);
|
||||
}
|
||||
|
||||
private String getAlleleRepresentation(Allele allele) {
|
||||
|
|
@ -145,7 +140,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
|
||||
// public String getIndelBases()
|
||||
public List<String> getKeyNames() { return Arrays.asList("AD"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -47,10 +48,11 @@ import java.util.Map;
|
|||
* Count for each sample of mapping quality zero reads
|
||||
*/
|
||||
public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker,
|
||||
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
|
||||
public void annotate(RefMetaDataTracker tracker,
|
||||
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context,
|
||||
VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
return;
|
||||
|
||||
int mq0 = 0;
|
||||
if ( context.hasBasePileup() ) {
|
||||
|
|
@ -60,9 +62,8 @@ public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
|||
mq0++;
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", mq0));
|
||||
return map;
|
||||
|
||||
gb.attribute(getKeyNames().get(0), mq0);
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
|
||||
|
|
|
|||
|
|
@ -261,24 +261,22 @@ public class VariantAnnotatorEngine {
|
|||
}
|
||||
|
||||
private GenotypesContext annotateGenotypes(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( requestedGenotypeAnnotations.size() == 0 )
|
||||
if ( requestedGenotypeAnnotations.isEmpty() )
|
||||
return vc.getGenotypes();
|
||||
|
||||
GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
|
||||
if ( context == null ) {
|
||||
genotypes.add(genotype);
|
||||
continue;
|
||||
} else {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
|
||||
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
annotation.annotate(tracker, walker, ref, context, vc, genotype, gb);
|
||||
}
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
Map<String, Object> genotypeAnnotations = new HashMap<String, Object>(genotype.getAttributes());
|
||||
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
Map<String, Object> result = annotation.annotate(tracker, walker, ref, context, vc, genotype);
|
||||
if ( result != null )
|
||||
genotypeAnnotations.putAll(result);
|
||||
}
|
||||
genotypes.add(new Genotype(genotype.getSampleName(), genotype.getAlleles(), genotype.getLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.isPhased()));
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -13,8 +14,9 @@ import java.util.Map;
|
|||
public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation {
|
||||
|
||||
// return annotations for the given contexts/genotype split by sample
|
||||
public abstract Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker,
|
||||
ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g);
|
||||
public abstract void annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker,
|
||||
ReferenceContext ref, AlignmentContext stratifiedContext,
|
||||
VariantContext vc, Genotype g, GenotypeBuilder gb );
|
||||
|
||||
// return the descriptions used for the VCF FORMAT meta field
|
||||
public abstract List<VCFFormatHeaderLine> getDescriptions();
|
||||
|
|
|
|||
|
|
@ -204,8 +204,6 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
for ( final Genotype g : vc_input.getGenotypes() ) {
|
||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||
|
||||
boolean genotypeIsPhased = true;
|
||||
String sample = g.getSampleName();
|
||||
|
||||
|
|
@ -271,7 +269,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
// Compute new GQ field = -10*log10Pr(Genotype call is wrong)
|
||||
// Beagle gives probability that genotype is AA, AB and BB.
|
||||
// Which, by definition, are prob of hom ref, het and hom var.
|
||||
Double probWrongGenotype, genotypeQuality;
|
||||
double probWrongGenotype, genotypeQuality;
|
||||
Double homRefProbability = Double.valueOf(beagleProbabilities.get(0));
|
||||
Double hetProbability = Double.valueOf(beagleProbabilities.get(1));
|
||||
Double homVarProbability = Double.valueOf(beagleProbabilities.get(2));
|
||||
|
|
@ -300,7 +298,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
else
|
||||
genotypeQuality = log10(probWrongGenotype);
|
||||
|
||||
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getAttributes());
|
||||
HashMap<String,Object> originalAttributes = new HashMap<String,Object>(g.getExtendedAttributes());
|
||||
|
||||
// get original encoding and add to keynotype attributes
|
||||
String a1, a2, og;
|
||||
|
|
@ -328,7 +326,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
else {
|
||||
originalAttributes.put("OG",".");
|
||||
}
|
||||
Genotype imputedGenotype = new Genotype(g.getSampleName(), alleles, genotypeQuality, filters,originalAttributes , genotypeIsPhased);
|
||||
Genotype imputedGenotype = new GenotypeBuilder(g).alleles(alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
|
||||
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
|
||||
beagleVarCounts++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,10 +36,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -260,7 +257,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStart(), alleles);
|
||||
|
||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
||||
vcb.filters(statusesToStrings(stats.callableStatuses(thresholds)));
|
||||
vcb.filters(new HashSet<String>(statusesToStrings(stats.callableStatuses(thresholds))));
|
||||
|
||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
||||
|
|
@ -270,21 +267,20 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
|
||||
}
|
||||
for (String sample : samples) {
|
||||
Map<String, Object> infos = new HashMap<String, Object>();
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
infos.put(VCFConstants.DEPTH_KEY, sampleStat.averageCoverage());
|
||||
infos.put("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
infos.put("MED", sampleStat.getQuantileDepth(0.50));
|
||||
infos.put("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
gb.DP((int)sampleStat.averageCoverage());
|
||||
gb.attribute("Q1", sampleStat.getQuantileDepth(0.25));
|
||||
gb.attribute("MED", sampleStat.getQuantileDepth(0.50));
|
||||
gb.attribute("Q3", sampleStat.getQuantileDepth(0.75));
|
||||
|
||||
if (debug) {
|
||||
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
||||
}
|
||||
gb.filters(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
|
||||
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
vcb = vcb.genotypes(genotypes);
|
||||
|
||||
|
|
@ -299,8 +295,8 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
* @param statuses the set of statuses to be converted
|
||||
* @return a matching set of strings
|
||||
*/
|
||||
private Set<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
Set<String> output = new HashSet<String>(statuses.size());
|
||||
private List<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
List<String> output = new ArrayList<String>(statuses.size());
|
||||
|
||||
for (CallableStatus status : statuses)
|
||||
output.add(status.name());
|
||||
|
|
|
|||
|
|
@ -79,14 +79,12 @@ class SampleStatistics {
|
|||
* @return the callable statuses of the entire sample
|
||||
*/
|
||||
public Set<CallableStatus> getCallableStatuses(ThresHolder thresholds) {
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
// We check if reads are present ot prevent div / 0 exceptions
|
||||
if (nReads == 0) {
|
||||
output.add(CallableStatus.NO_READS);
|
||||
return output;
|
||||
return Collections.singleton(CallableStatus.NO_READS);
|
||||
}
|
||||
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
Map<CallableStatus, Double> totals = new HashMap<CallableStatus, Double>(CallableStatus.values().length);
|
||||
|
||||
// initialize map
|
||||
|
|
@ -126,6 +124,7 @@ class SampleStatistics {
|
|||
if (output.isEmpty()) {
|
||||
output.add(CallableStatus.PASS);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -55,8 +55,6 @@ public class BAMDiffableReader implements DiffableReader {
|
|||
|
||||
int count = 0;
|
||||
while ( iterator.hasNext() ) {
|
||||
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
final SAMRecord record = iterator.next();
|
||||
|
||||
// name is the read name + first of pair
|
||||
|
|
@ -88,6 +86,9 @@ public class BAMDiffableReader implements DiffableReader {
|
|||
if ( ! root.hasElement(name) )
|
||||
// protect ourselves from malformed files
|
||||
root.add(readRoot);
|
||||
count += readRoot.size();
|
||||
if ( count > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ public class DiffEngine {
|
|||
* @param diffs the list of differences to summarize
|
||||
*/
|
||||
public void reportSummarizedDifferences(List<Difference> diffs, SummaryReportParams params ) {
|
||||
printSummaryReport(summarizedDifferencesOfPaths(diffs, params.maxRawDiffsToSummarize), params );
|
||||
printSummaryReport(summarizedDifferencesOfPaths(diffs, params.doPairwise, params.maxRawDiffsToSummarize), params );
|
||||
}
|
||||
|
||||
final protected static String[] diffNameToPath(String diffName) {
|
||||
|
|
@ -161,9 +161,17 @@ public class DiffEngine {
|
|||
diffs.add(new Difference(diff));
|
||||
}
|
||||
|
||||
return summarizedDifferencesOfPaths(diffs, -1);
|
||||
return summarizedDifferencesOfPaths(diffs, true, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a minimum set of potential differences between all singleton differences
|
||||
* in singletonDiffs. Employs an expensive pairwise O(n^2) algorithm.
|
||||
*
|
||||
* @param singletonDiffs
|
||||
* @param maxRawDiffsToSummarize
|
||||
* @return
|
||||
*/
|
||||
private Map<String, Difference> initialPairwiseSummaries(final List<? extends Difference> singletonDiffs,
|
||||
final int maxRawDiffsToSummarize) {
|
||||
Map<String, Difference> summaries = new HashMap<String, Difference>();
|
||||
|
|
@ -191,9 +199,41 @@ public class DiffEngine {
|
|||
return summaries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the possible leaf differences among the singleton diffs.
|
||||
*
|
||||
* The leaf differences are all of the form *.*...*.X where all internal
|
||||
* differences are wildcards and the only summarized difference considered
|
||||
* interesting to compute is
|
||||
*
|
||||
* @param singletonDiffs
|
||||
* @param maxRawDiffsToSummarize
|
||||
* @return
|
||||
*/
|
||||
private Map<String, Difference> initialLeafSummaries(final List<? extends Difference> singletonDiffs,
|
||||
final int maxRawDiffsToSummarize) {
|
||||
Map<String, Difference> summaries = new HashMap<String, Difference>();
|
||||
|
||||
// create the initial set of differences
|
||||
for ( final Difference d : singletonDiffs ) {
|
||||
final String path = summarizedPath(d.getParts(), 1);
|
||||
Difference sumDiff = new Difference(path, d.getMaster(), d.getTest());
|
||||
sumDiff.setCount(0);
|
||||
addSummaryIfMissing(summaries, sumDiff);
|
||||
|
||||
if ( maxRawDiffsToSummarize != -1 && summaries.size() > maxRawDiffsToSummarize)
|
||||
return summaries;
|
||||
}
|
||||
|
||||
return summaries;
|
||||
}
|
||||
|
||||
protected List<Difference> summarizedDifferencesOfPaths(final List<? extends Difference> singletonDiffs,
|
||||
final boolean doPairwise,
|
||||
final int maxRawDiffsToSummarize) {
|
||||
Map<String, Difference> summaries = initialPairwiseSummaries(singletonDiffs, maxRawDiffsToSummarize);
|
||||
final Map<String, Difference> summaries = doPairwise
|
||||
? initialPairwiseSummaries(singletonDiffs, maxRawDiffsToSummarize)
|
||||
: initialLeafSummaries(singletonDiffs, maxRawDiffsToSummarize);
|
||||
|
||||
// count differences
|
||||
for ( Difference diffPath : singletonDiffs ) {
|
||||
|
|
@ -372,18 +412,21 @@ public class DiffEngine {
|
|||
final int maxCountOneItems;
|
||||
final int minSumDiffToShow;
|
||||
final int maxRawDiffsToSummarize;
|
||||
final boolean doPairwise;
|
||||
boolean descending = true;
|
||||
|
||||
public SummaryReportParams(PrintStream out,
|
||||
int maxItemsToDisplay,
|
||||
int maxCountOneItems,
|
||||
int minSumDiffToShow,
|
||||
int maxRawDiffsToSummarize) {
|
||||
int maxRawDiffsToSummarize,
|
||||
final boolean doPairwise) {
|
||||
this.out = out;
|
||||
this.maxItemsToDisplay = maxItemsToDisplay;
|
||||
this.maxCountOneItems = maxCountOneItems;
|
||||
this.minSumDiffToShow = minSumDiffToShow;
|
||||
this.maxRawDiffsToSummarize = maxRawDiffsToSummarize;
|
||||
this.doPairwise = doPairwise;
|
||||
}
|
||||
|
||||
public void setDescending(boolean descending) {
|
||||
|
|
|
|||
|
|
@ -111,21 +111,21 @@ import java.util.List;
|
|||
* <p>
|
||||
*
|
||||
* <pre>
|
||||
[testng] path count
|
||||
[testng] *.*.*.AC 6
|
||||
[testng] *.*.*.AF 6
|
||||
[testng] *.*.*.AN 6
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC 1
|
||||
</pre>
|
||||
[testng] path count
|
||||
[testng] *.*.*.AC 6
|
||||
[testng] *.*.*.AF 6
|
||||
[testng] *.*.*.AN 6
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000000.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000117.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AC 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AF 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000211.AN 1
|
||||
[testng] 64b991fd3850f83614518f7d71f0532f.integrationtest.20:10000598.AC 1
|
||||
</pre>
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 7/4/11
|
||||
|
|
@ -165,6 +165,8 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="maxRawDiffsToSummary", shortName="maxRawDiffsToSummary", doc="Max. number of objects to read from the files. -1 [default] means unlimited", required=false)
|
||||
int maxRawDiffsToSummary = -1;
|
||||
|
||||
@Argument(fullName="doPairwise", shortName="doPairwise", doc="If provided, we will compute the minimum pairwise differences to summary, which can be extremely expensive", required=false)
|
||||
boolean doPairwise = false;
|
||||
|
||||
/**
|
||||
* The max number of differences to display when summarizing. For example, if there are 10M differences, but
|
||||
|
|
@ -199,11 +201,14 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="showItemizedDifferences", shortName="SID", doc="Should we enumerate all differences between the files?", required=false)
|
||||
boolean showItemizedDifferences = false;
|
||||
|
||||
@Argument(fullName="iterations", doc="Number of iterations to perform, should be 1 unless you are doing memory testing", required=false)
|
||||
int iterations = 1;
|
||||
|
||||
DiffEngine diffEngine;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
this.diffEngine = new DiffEngine();
|
||||
this.diffEngine = new DiffEngine();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -223,29 +228,39 @@ public class DiffObjectsWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
@Override
|
||||
public void onTraversalDone(Integer sum) {
|
||||
//out.printf("Reading master file %s%n", masterFile);
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
|
||||
logger.info(String.format("Read %d objects", master.size()));
|
||||
//out.printf("Reading test file %s%n", testFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
|
||||
logger.info(String.format("Read %d objects", test.size()));
|
||||
if ( iterations > 1 ) {
|
||||
for ( int i = 0; i < iterations; i++ ) {
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, 20, 10, 0, -1, false);
|
||||
boolean success = DiffEngine.simpleDiffFiles(masterFile, testFile, MAX_OBJECTS_TO_READ, params);
|
||||
logger.info("Iteration " + i + " success " + success);
|
||||
}
|
||||
} else {
|
||||
//out.printf("Reading master file %s%n", masterFile);
|
||||
DiffElement master = diffEngine.createDiffableFromFile(masterFile, MAX_OBJECTS_TO_READ);
|
||||
logger.info(String.format("Read %d objects", master.size()));
|
||||
//out.printf("Reading test file %s%n", testFile);
|
||||
DiffElement test = diffEngine.createDiffableFromFile(testFile, MAX_OBJECTS_TO_READ);
|
||||
logger.info(String.format("Read %d objects", test.size()));
|
||||
|
||||
// out.printf("Master diff objects%n");
|
||||
// out.println(master.toString());
|
||||
// out.printf("Test diff objects%n");
|
||||
// out.println(test.toString());
|
||||
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
logger.info(String.format("Done computing diff with %d differences found", diffs.size()));
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( Difference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
List<Difference> diffs = diffEngine.diff(master, test);
|
||||
logger.info(String.format("Done computing diff with %d differences found", diffs.size()));
|
||||
if ( showItemizedDifferences ) {
|
||||
out.printf("Itemized results%n");
|
||||
for ( Difference diff : diffs )
|
||||
out.printf("DIFF: %s%n", diff.toString());
|
||||
}
|
||||
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out, MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff, maxRawDiffsToSummary);
|
||||
params.setDescending(false);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
logger.info(String.format("Done summarizing differences"));
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(out,
|
||||
MAX_DIFFS, MAX_COUNT1_DIFFS, minCountForDiff,
|
||||
maxRawDiffsToSummary, doPairwise);
|
||||
params.setDescending(false);
|
||||
diffEngine.reportSummarizedDifferences(diffs, params);
|
||||
logger.info(String.format("Done summarizing differences"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,11 +29,13 @@ import org.broad.tribble.AbstractFeatureReader;
|
|||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -79,9 +81,6 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
String prevName = "";
|
||||
Iterator<VariantContext> it = reader.iterator();
|
||||
while ( it.hasNext() ) {
|
||||
if ( count++ > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
|
||||
VariantContext vc = it.next();
|
||||
String name = vc.getChr() + ":" + vc.getStart();
|
||||
if ( name.equals(prevName) ) {
|
||||
|
|
@ -109,9 +108,12 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
for (Genotype g : vc.getGenotypes() ) {
|
||||
DiffNode gRoot = DiffNode.empty(g.getSampleName(), vcRoot);
|
||||
gRoot.add("GT", g.getGenotypeString());
|
||||
gRoot.add("GQ", g.hasLog10PError() ? g.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4 );
|
||||
if ( g.hasGQ() ) gRoot.add("GQ", g.getGQ() );
|
||||
if ( g.hasDP() ) gRoot.add("DP", g.getDP() );
|
||||
if ( g.hasAD() ) gRoot.add("AD", Utils.join(",", g.getAD()));
|
||||
if ( g.hasPL() ) gRoot.add("PL", Utils.join(",", g.getPL()));
|
||||
|
||||
for (Map.Entry<String, Object> attribute : g.getAttributes().entrySet()) {
|
||||
for (Map.Entry<String, Object> attribute : g.getExtendedAttributes().entrySet()) {
|
||||
if ( ! attribute.getKey().startsWith("_") )
|
||||
gRoot.add(attribute.getKey(), attribute.getValue());
|
||||
}
|
||||
|
|
@ -120,6 +122,9 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
}
|
||||
|
||||
root.add(vcRoot);
|
||||
count += vcRoot.size();
|
||||
if ( count > maxElementsToRead && maxElementsToRead != -1)
|
||||
break;
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
|
|
|||
|
|
@ -297,13 +297,14 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
// for each genotype, check filters then create a new object
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.isCalled() ) {
|
||||
Set<String> filters = new LinkedHashSet<String>(g.getFilters());
|
||||
List<String> filters = new ArrayList<String>(g.getFilters());
|
||||
|
||||
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
||||
if ( VariantContextUtils.match(vc, g, exp) )
|
||||
filters.add(exp.name);
|
||||
}
|
||||
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), filters, g.getAttributes(), g.isPhased()));
|
||||
|
||||
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
|
||||
} else {
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -141,13 +141,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
if (context.hasBasePileup()) {
|
||||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup != null) {
|
||||
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
|
||||
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
b.PL(genotypeLikelihoods);
|
||||
b.DP(getFilteredDepth(pileup));
|
||||
genotypes.add(b.make());
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString());
|
||||
|
|
|
|||
|
|
@ -158,12 +158,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
myLikelihoods[i] = allLikelihoods[PLordering[i]];
|
||||
|
||||
// normalize in log space so that max element is zero.
|
||||
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
||||
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, sampleData.depth);
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sampleData.name, noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sampleData.name);
|
||||
final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true);
|
||||
gb.PL(genotypeLikelihoods);
|
||||
gb.DP(sampleData.depth);
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).make();
|
||||
|
|
|
|||
|
|
@ -617,7 +617,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
throw new StingException("Read "+read.getReadName()+" from "+getToolkit().getSourceFileForReaderID(getToolkit().getReaderIDForRead(read))+
|
||||
"has no Normal/Tumor tag associated with it");
|
||||
|
||||
// String rg = (String)read.getAttribute("RG");
|
||||
// String rg = (String)read.getExtendedAttribute("RG");
|
||||
// if ( rg == null )
|
||||
// throw new UserException.MalformedBam(read, "Read "+read.getReadName()+" has no read group in merged stream. RG is required for somatic calls.");
|
||||
|
||||
|
|
@ -1148,13 +1148,12 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
for ( String sample : normalSamples ) {
|
||||
|
||||
Map<String,Object> attrs = call.makeStatsAttributes(null);
|
||||
|
||||
if ( ! discard_event ) // we made a call - put actual het genotype here:
|
||||
genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||
else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
gb.attributes(call.makeStatsAttributes(null));
|
||||
gb.alleles(! discard_event
|
||||
? alleles // we made a call - put actual het genotype here:
|
||||
: homref_alleles); // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
genotypes.add(gb.make());
|
||||
|
||||
}
|
||||
Set<String> filters = null;
|
||||
|
|
@ -1238,11 +1237,11 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
for ( String sample : normalSamples ) {
|
||||
genotypes.add(new Genotype(sample, homRefN ? homRefAlleles : alleles,Genotype.NO_LOG10_PERROR,null,attrsNormal,false));
|
||||
genotypes.add(GenotypeBuilder.create(sample, homRefN ? homRefAlleles : alleles, attrsNormal));
|
||||
}
|
||||
|
||||
for ( String sample : tumorSamples ) {
|
||||
genotypes.add(new Genotype(sample, homRefT ? homRefAlleles : alleles,Genotype.NO_LOG10_PERROR,null,attrsTumor,false) );
|
||||
genotypes.add(GenotypeBuilder.create(sample, homRefT ? homRefAlleles : alleles, attrsTumor));
|
||||
}
|
||||
|
||||
Set<String> filters = null;
|
||||
|
|
@ -2144,7 +2143,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
|
||||
class VCFIndelAttributes {
|
||||
public static String ALLELIC_DEPTH_KEY = "AD";
|
||||
public static String ALLELIC_DEPTH_KEY = VCFConstants.GENOTYPE_ALLELE_DEPTHS;
|
||||
public static String DEPTH_TOTAL_KEY = VCFConstants.DEPTH_KEY;
|
||||
|
||||
public static String MAPQ_KEY = "MQS";
|
||||
|
|
|
|||
|
|
@ -97,10 +97,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
private ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||
|
||||
//Matrix of priors for all genotype combinations
|
||||
private EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> mvCountMatrix;
|
||||
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> mvCountMatrix;
|
||||
|
||||
//Matrix of allele transmission
|
||||
private EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>> transmissionMatrix;
|
||||
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>> transmissionMatrix;
|
||||
|
||||
//Metrics counters hash keys
|
||||
private final Byte NUM_TRIO_GENOTYPES_CALLED = 0;
|
||||
|
|
@ -138,17 +138,17 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
private EnumMap<FamilyMember,Genotype> trioPhasedGenotypes = new EnumMap<FamilyMember, Genotype>(FamilyMember.class);
|
||||
|
||||
private ArrayList<Allele> getAlleles(Genotype.Type genotype){
|
||||
private ArrayList<Allele> getAlleles(GenotypeType genotype){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
if(genotype == Genotype.Type.HOM_REF){
|
||||
if(genotype == GenotypeType.HOM_REF){
|
||||
alleles.add(REF);
|
||||
alleles.add(REF);
|
||||
}
|
||||
else if(genotype == Genotype.Type.HET){
|
||||
else if(genotype == GenotypeType.HET){
|
||||
alleles.add(REF);
|
||||
alleles.add(VAR);
|
||||
}
|
||||
else if(genotype == Genotype.Type.HOM_VAR){
|
||||
else if(genotype == GenotypeType.HOM_VAR){
|
||||
alleles.add(VAR);
|
||||
alleles.add(VAR);
|
||||
}
|
||||
|
|
@ -158,27 +158,34 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
return alleles;
|
||||
}
|
||||
|
||||
private boolean isPhasable(Genotype.Type genotype){
|
||||
return genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HET || genotype == Genotype.Type.HOM_VAR;
|
||||
private boolean isPhasable(GenotypeType genotype){
|
||||
return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
|
||||
}
|
||||
|
||||
//Create a new Genotype based on information from a single individual
|
||||
//Homozygous genotypes will be set as phased, heterozygous won't be
|
||||
private void phaseSingleIndividualAlleles(Genotype.Type genotype, FamilyMember familyMember){
|
||||
if(genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HOM_VAR){
|
||||
trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME, getAlleles(genotype), Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
}
|
||||
else
|
||||
trioPhasedGenotypes.put(familyMember, new Genotype(DUMMY_NAME,getAlleles(genotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
private void phaseSingleIndividualAlleles(GenotypeType genotype, FamilyMember familyMember){
|
||||
boolean phase = genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HOM_VAR;
|
||||
trioPhasedGenotypes.put(familyMember, makeGenotype(genotype, phase));
|
||||
}
|
||||
|
||||
private Genotype makeGenotype(final GenotypeType type, boolean phase) {
|
||||
return makeGenotype(getAlleles(type), phase);
|
||||
}
|
||||
|
||||
private Genotype makeGenotype(final List<Allele> alleles, boolean phase) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(DUMMY_NAME, alleles);
|
||||
gb.phased(phase);
|
||||
return gb.make();
|
||||
}
|
||||
|
||||
//Find the phase for a parent/child pair
|
||||
private void phasePairAlleles(Genotype.Type parentGenotype, Genotype.Type childGenotype, FamilyMember parent){
|
||||
private void phasePairAlleles(GenotypeType parentGenotype, GenotypeType childGenotype, FamilyMember parent){
|
||||
|
||||
//Special case for Het/Het as it is ambiguous
|
||||
if(parentGenotype == Genotype.Type.HET && childGenotype == Genotype.Type.HET){
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, getAlleles(parentGenotype), Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
if(parentGenotype == GenotypeType.HET && childGenotype == GenotypeType.HET){
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentGenotype, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childGenotype, false));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -190,34 +197,34 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//If there is a possible phasing between the parent and child => phase
|
||||
int childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(0));
|
||||
if(childTransmittedAlleleIndex > -1){
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentAlleles, true));
|
||||
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
|
||||
if(parent.equals(FamilyMember.MOTHER))
|
||||
childPhasedAlleles.add(childAlleles.get(0));
|
||||
else
|
||||
childPhasedAlleles.add(0,childAlleles.get(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAlleles, true));
|
||||
}
|
||||
else if((childTransmittedAlleleIndex = childAlleles.indexOf(parentAlleles.get(1))) > -1){
|
||||
parentPhasedAlleles.add(parentAlleles.get(1));
|
||||
parentPhasedAlleles.add(parentAlleles.get(0));
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME, parentPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentPhasedAlleles, true));
|
||||
childPhasedAlleles.add(childAlleles.remove(childTransmittedAlleleIndex));
|
||||
if(parent.equals(FamilyMember.MOTHER))
|
||||
childPhasedAlleles.add(childAlleles.get(0));
|
||||
else
|
||||
childPhasedAlleles.add(0,childAlleles.get(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME, childPhasedAlleles, Genotype.NO_LOG10_PERROR, null, null, true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAlleles, true));
|
||||
}
|
||||
//This is a Mendelian Violation => Do not phase
|
||||
else{
|
||||
trioPhasedGenotypes.put(parent, new Genotype(DUMMY_NAME,getAlleles(parentGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(childGenotype),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(parent, makeGenotype(parentGenotype, false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childGenotype, false));
|
||||
}
|
||||
}
|
||||
|
||||
//Phases a family by transmission
|
||||
private void phaseFamilyAlleles(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
private void phaseFamilyAlleles(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
Set<ArrayList<Allele>> possiblePhasedChildGenotypes = new HashSet<ArrayList<Allele>>();
|
||||
ArrayList<Allele> motherAlleles = getAlleles(mother);
|
||||
|
|
@ -246,7 +253,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
motherPhasedAlleles.add(motherAlleles.get(0));
|
||||
else
|
||||
motherPhasedAlleles.add(motherAlleles.get(1));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,motherPhasedAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, makeGenotype(motherPhasedAlleles, true));
|
||||
|
||||
//Create father's genotype
|
||||
ArrayList<Allele> fatherPhasedAlleles = new ArrayList<Allele>(2);
|
||||
|
|
@ -255,10 +262,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
fatherPhasedAlleles.add(fatherAlleles.get(0));
|
||||
else
|
||||
fatherPhasedAlleles.add(fatherAlleles.get(1));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,fatherPhasedAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, makeGenotype(fatherPhasedAlleles,true));
|
||||
|
||||
//Create child's genotype
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,childPhasedAllelesAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(childPhasedAllelesAlleles,true));
|
||||
|
||||
//Once a phased combination is found; exit
|
||||
return;
|
||||
|
|
@ -266,16 +273,16 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
}
|
||||
|
||||
//If this is reached then no phasing could be found
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, new Genotype(DUMMY_NAME,getAlleles(mother),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, new Genotype(DUMMY_NAME,getAlleles(father),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, new Genotype(DUMMY_NAME,getAlleles(child),Genotype.NO_LOG10_PERROR,null,null,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.MOTHER, makeGenotype(mother,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.FATHER, makeGenotype(father,false));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD, makeGenotype(child,false));
|
||||
}
|
||||
|
||||
/* Constructor: Creates a conceptual trio genotype combination from the given genotypes.
|
||||
If one or more genotypes are set as NO_CALL or UNAVAILABLE, it will phase them like a pair
|
||||
or single individual.
|
||||
*/
|
||||
public TrioPhase(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
public TrioPhase(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
//Take care of cases where one or more family members are no call
|
||||
if(!isPhasable(child)){
|
||||
|
|
@ -297,7 +304,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
|
||||
}
|
||||
//Special case for Het/Het/Het as it is ambiguous
|
||||
else if(mother == Genotype.Type.HET && father == Genotype.Type.HET && child == Genotype.Type.HET){
|
||||
else if(mother == GenotypeType.HET && father == GenotypeType.HET && child == GenotypeType.HET){
|
||||
phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER);
|
||||
phaseSingleIndividualAlleles(father, FamilyMember.FATHER);
|
||||
phaseSingleIndividualAlleles(child, FamilyMember.CHILD);
|
||||
|
|
@ -311,7 +318,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
if(fatherFAlleleFirst && trioPhasedGenotypes.get(FamilyMember.CHILD).isPhased()){
|
||||
ArrayList<Allele> childAlleles = new ArrayList<Allele>(trioPhasedGenotypes.get(FamilyMember.CHILD).getAlleles());
|
||||
childAlleles.add(childAlleles.remove(0));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD,new Genotype(DUMMY_NAME,childAlleles,Genotype.NO_LOG10_PERROR,null,null,true));
|
||||
trioPhasedGenotypes.put(FamilyMember.CHILD,makeGenotype(childAlleles,true));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -347,7 +354,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Add the transmission probability
|
||||
Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
genotypeAttributes.putAll(genotype.getAttributes());
|
||||
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
||||
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
|
||||
|
||||
|
|
@ -370,7 +377,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
else
|
||||
log10Error = genotype.getLikelihoods().getLog10GQ(phasedGenotype.getType());
|
||||
|
||||
return new Genotype(genotype.getSampleName(), phasedAlleles, log10Error, null, genotypeAttributes, phasedGenotype.isPhased());
|
||||
return new GenotypeBuilder(genotype).alleles(phasedAlleles)
|
||||
.log10PError(log10Error)
|
||||
.attributes(genotypeAttributes)
|
||||
.phased(phasedGenotype.isPhased()).make();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -438,15 +448,15 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Create the transmission matrices
|
||||
private void buildMatrices(){
|
||||
mvCountMatrix = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||
transmissionMatrix = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>>(Genotype.Type.class);
|
||||
for(Genotype.Type mother : Genotype.Type.values()){
|
||||
mvCountMatrix.put(mother,new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||
transmissionMatrix.put(mother,new EnumMap<Genotype.Type,EnumMap<Genotype.Type,TrioPhase>>(Genotype.Type.class));
|
||||
for(Genotype.Type father : Genotype.Type.values()){
|
||||
mvCountMatrix.get(mother).put(father,new EnumMap<Genotype.Type, Integer>(Genotype.Type.class));
|
||||
transmissionMatrix.get(mother).put(father,new EnumMap<Genotype.Type,TrioPhase>(Genotype.Type.class));
|
||||
for(Genotype.Type child : Genotype.Type.values()){
|
||||
mvCountMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||
transmissionMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>>(GenotypeType.class);
|
||||
for(GenotypeType mother : GenotypeType.values()){
|
||||
mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
||||
transmissionMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>(GenotypeType.class));
|
||||
for(GenotypeType father : GenotypeType.values()){
|
||||
mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class));
|
||||
transmissionMatrix.get(mother).put(father,new EnumMap<GenotypeType,TrioPhase>(GenotypeType.class));
|
||||
for(GenotypeType child : GenotypeType.values()){
|
||||
mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
|
||||
transmissionMatrix.get(mother).get(father).put(child,new TrioPhase(mother,father,child));
|
||||
}
|
||||
|
|
@ -457,16 +467,16 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//Returns the number of Mendelian Violations for a given genotype combination.
|
||||
//If one of the parents genotype is missing, it will consider it as a parent/child pair
|
||||
//If the child genotype or both parents genotypes are missing, 0 is returned.
|
||||
private int getCombinationMVCount(Genotype.Type mother, Genotype.Type father, Genotype.Type child){
|
||||
private int getCombinationMVCount(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
|
||||
//Child is no call => No MV
|
||||
if(child == Genotype.Type.NO_CALL || child == Genotype.Type.UNAVAILABLE)
|
||||
if(child == GenotypeType.NO_CALL || child == GenotypeType.UNAVAILABLE)
|
||||
return 0;
|
||||
//Add parents with genotypes for the evaluation
|
||||
ArrayList<Genotype.Type> parents = new ArrayList<Genotype.Type>();
|
||||
if (!(mother == Genotype.Type.NO_CALL || mother == Genotype.Type.UNAVAILABLE))
|
||||
ArrayList<GenotypeType> parents = new ArrayList<GenotypeType>();
|
||||
if (!(mother == GenotypeType.NO_CALL || mother == GenotypeType.UNAVAILABLE))
|
||||
parents.add(mother);
|
||||
if (!(father == Genotype.Type.NO_CALL || father == Genotype.Type.UNAVAILABLE))
|
||||
if (!(father == GenotypeType.NO_CALL || father == GenotypeType.UNAVAILABLE))
|
||||
parents.add(father);
|
||||
|
||||
//Both parents no calls => No MV
|
||||
|
|
@ -477,35 +487,35 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
int parentsNumRefAlleles = 0;
|
||||
int parentsNumAltAlleles = 0;
|
||||
|
||||
for(Genotype.Type parent : parents){
|
||||
if(parent == Genotype.Type.HOM_REF){
|
||||
for(GenotypeType parent : parents){
|
||||
if(parent == GenotypeType.HOM_REF){
|
||||
parentsNumRefAlleles++;
|
||||
}
|
||||
else if(parent == Genotype.Type.HET){
|
||||
else if(parent == GenotypeType.HET){
|
||||
parentsNumRefAlleles++;
|
||||
parentsNumAltAlleles++;
|
||||
}
|
||||
else if(parent == Genotype.Type.HOM_VAR){
|
||||
else if(parent == GenotypeType.HOM_VAR){
|
||||
parentsNumAltAlleles++;
|
||||
}
|
||||
}
|
||||
|
||||
//Case Child is HomRef
|
||||
if(child == Genotype.Type.HOM_REF){
|
||||
if(child == GenotypeType.HOM_REF){
|
||||
if(parentsNumRefAlleles == parents.size())
|
||||
return 0;
|
||||
else return (parents.size()-parentsNumRefAlleles);
|
||||
}
|
||||
|
||||
//Case child is HomVar
|
||||
if(child == Genotype.Type.HOM_VAR){
|
||||
if(child == GenotypeType.HOM_VAR){
|
||||
if(parentsNumAltAlleles == parents.size())
|
||||
return 0;
|
||||
else return parents.size()-parentsNumAltAlleles;
|
||||
}
|
||||
|
||||
//Case child is Het
|
||||
if(child == Genotype.Type.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
|
||||
if(child == GenotypeType.HET && ((parentsNumRefAlleles > 0 && parentsNumAltAlleles > 0) || parents.size()<2))
|
||||
return 0;
|
||||
|
||||
//MV
|
||||
|
|
@ -513,7 +523,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
}
|
||||
|
||||
//Given two trio genotypes combinations, returns the number of different genotypes between the two combinations.
|
||||
private int countFamilyGenotypeDiff(Genotype.Type motherOriginal,Genotype.Type fatherOriginal,Genotype.Type childOriginal,Genotype.Type motherNew,Genotype.Type fatherNew,Genotype.Type childNew){
|
||||
private int countFamilyGenotypeDiff(GenotypeType motherOriginal,GenotypeType fatherOriginal,GenotypeType childOriginal,GenotypeType motherNew,GenotypeType fatherNew,GenotypeType childNew){
|
||||
int count = 0;
|
||||
if(motherOriginal!=motherNew)
|
||||
count++;
|
||||
|
|
@ -526,21 +536,21 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
|
||||
//Get a Map of genotype likelihoods.
|
||||
//In case of null, unavailable or no call, all likelihoods are 1/3.
|
||||
private EnumMap<Genotype.Type,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||
private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||
if(genotype == null || !genotype.isCalled()){
|
||||
EnumMap<Genotype.Type,Double> likelihoods = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
|
||||
likelihoods.put(Genotype.Type.HOM_REF,1.0/3.0);
|
||||
likelihoods.put(Genotype.Type.HET,1.0/3.0);
|
||||
likelihoods.put(Genotype.Type.HOM_VAR,1.0/3.0);
|
||||
EnumMap<GenotypeType,Double> likelihoods = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
likelihoods.put(GenotypeType.HOM_REF,1.0/3.0);
|
||||
likelihoods.put(GenotypeType.HET,1.0/3.0);
|
||||
likelihoods.put(GenotypeType.HOM_VAR,1.0/3.0);
|
||||
return likelihoods;
|
||||
}
|
||||
return genotype.getLikelihoods().getAsMap(true);
|
||||
}
|
||||
|
||||
//Returns the Genotype.Type; returns UNVAILABLE if given null
|
||||
private Genotype.Type getTypeSafeNull(Genotype genotype){
|
||||
//Returns the GenotypeType; returns UNVAILABLE if given null
|
||||
private GenotypeType getTypeSafeNull(Genotype genotype){
|
||||
if(genotype == null)
|
||||
return Genotype.Type.UNAVAILABLE;
|
||||
return GenotypeType.UNAVAILABLE;
|
||||
return genotype.getType();
|
||||
}
|
||||
|
||||
|
|
@ -561,18 +571,18 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
//Always assign the first parent as the parent having genotype information in pairs
|
||||
//Always assign the mother as the first parent in trios
|
||||
int parentsCalled = 0;
|
||||
Map<Genotype.Type,Double> firstParentLikelihoods;
|
||||
Map<Genotype.Type,Double> secondParentLikelihoods;
|
||||
ArrayList<Genotype.Type> bestFirstParentGenotype = new ArrayList<Genotype.Type>();
|
||||
ArrayList<Genotype.Type> bestSecondParentGenotype = new ArrayList<Genotype.Type>();
|
||||
ArrayList<Genotype.Type> bestChildGenotype = new ArrayList<Genotype.Type>();
|
||||
Genotype.Type pairSecondParentGenotype = null;
|
||||
Map<GenotypeType,Double> firstParentLikelihoods;
|
||||
Map<GenotypeType,Double> secondParentLikelihoods;
|
||||
ArrayList<GenotypeType> bestFirstParentGenotype = new ArrayList<GenotypeType>();
|
||||
ArrayList<GenotypeType> bestSecondParentGenotype = new ArrayList<GenotypeType>();
|
||||
ArrayList<GenotypeType> bestChildGenotype = new ArrayList<GenotypeType>();
|
||||
GenotypeType pairSecondParentGenotype = null;
|
||||
if(mother == null || !mother.isCalled()){
|
||||
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
||||
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
||||
bestFirstParentGenotype.add(getTypeSafeNull(father));
|
||||
bestSecondParentGenotype.add(getTypeSafeNull(mother));
|
||||
pairSecondParentGenotype = mother == null ? Genotype.Type.UNAVAILABLE : mother.getType();
|
||||
pairSecondParentGenotype = mother == null ? GenotypeType.UNAVAILABLE : mother.getType();
|
||||
if(father != null && father.isCalled())
|
||||
parentsCalled = 1;
|
||||
}
|
||||
|
|
@ -583,12 +593,12 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
bestSecondParentGenotype.add(getTypeSafeNull(father));
|
||||
if(father == null || !father.isCalled()){
|
||||
parentsCalled = 1;
|
||||
pairSecondParentGenotype = father == null ? Genotype.Type.UNAVAILABLE : father.getType();
|
||||
pairSecondParentGenotype = father == null ? GenotypeType.UNAVAILABLE : father.getType();
|
||||
}else{
|
||||
parentsCalled = 2;
|
||||
}
|
||||
}
|
||||
Map<Genotype.Type,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
||||
Map<GenotypeType,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
||||
bestChildGenotype.add(getTypeSafeNull(child));
|
||||
|
||||
//Prior vars
|
||||
|
|
@ -604,9 +614,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
int mvCount;
|
||||
int cumulativeMVCount = 0;
|
||||
double configurationLikelihood = 0;
|
||||
for(Map.Entry<Genotype.Type,Double> childGenotype : childLikelihoods.entrySet()){
|
||||
for(Map.Entry<Genotype.Type,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<Genotype.Type,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> childGenotype : childLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> firstParentGenotype : firstParentLikelihoods.entrySet()){
|
||||
for(Map.Entry<GenotypeType,Double> secondParentGenotype : secondParentLikelihoods.entrySet()){
|
||||
mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
|
||||
//For parent/child pairs, sum over the possible genotype configurations of the missing parent
|
||||
if(parentsCalled<2){
|
||||
|
|
@ -797,9 +807,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoodsString(),
|
||||
phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoodsString());
|
||||
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),
|
||||
phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString());
|
||||
if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
|
||||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
}
|
||||
|
|
@ -809,8 +819,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoodsString());
|
||||
phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
}
|
||||
}
|
||||
else{
|
||||
|
|
@ -820,8 +830,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s",
|
||||
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
|
||||
phasedFather.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoodsString());
|
||||
phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
|
||||
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
|
||||
}
|
||||
|
||||
//Report violation if set so
|
||||
|
|
|
|||
|
|
@ -109,14 +109,13 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
|
||||
Set<String> mergedGtFilters = new HashSet<String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered
|
||||
|
||||
Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
|
||||
PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
|
||||
if (phaseQual.PQ != null)
|
||||
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
|
||||
|
||||
Genotype mergedGt = new Genotype(gt1.getSampleName(), mergedAllelesForSample, mergedGQ, mergedGtFilters, mergedGtAttribs, phaseQual.isPhased);
|
||||
Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).log10PError(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make();
|
||||
mergedGenotypes.add(mergedGt);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
|
||||
// logger.debug("original VC = " + vc);
|
||||
// logger.debug("sub VC = " + subvc);
|
||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
|
|
@ -374,7 +374,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
if (isUnfilteredCalledDiploidGenotype(gt)) {
|
||||
if (gt.isHom()) { // Note that this Genotype may be replaced later to contain the PQ of a downstream het site that was phased relative to a het site lying upstream of this hom site:
|
||||
// true <-> can trivially phase a hom site relative to ANY previous site:
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), gt.getAlleles(), gt.getLog10PError(), gt.getFilters(), gt.getAttributes(), true);
|
||||
Genotype phasedGt = new GenotypeBuilder(gt).phased(true).make();
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
else if (gt.isHet()) { // Attempt to phase this het genotype relative to the previous het genotype
|
||||
|
|
@ -408,9 +408,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
if (DEBUG) logger.debug("THE PHASE CHOSEN HERE:\n" + allelePair + "\n\n");
|
||||
|
||||
ensurePhasing(allelePair, prevAllelePair, pr.haplotype);
|
||||
Map<String, Object> gtAttribs = new HashMap<String, Object>(gt.getAttributes());
|
||||
gtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedGt = new Genotype(gt.getSampleName(), allelePair.getAllelesAsList(), gt.getLog10PError(), gt.getFilters(), gtAttribs, genotypesArePhased);
|
||||
Genotype phasedGt = new GenotypeBuilder(gt)
|
||||
.alleles(allelePair.getAllelesAsList())
|
||||
.attribute(PQ_KEY, pr.phaseQuality)
|
||||
.phased(genotypesArePhased).make();
|
||||
uvc.setGenotype(samp, phasedGt);
|
||||
}
|
||||
|
||||
|
|
@ -428,9 +429,9 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
interiorUvc.setPhasingInconsistent();
|
||||
|
||||
if (genotypesArePhased) {
|
||||
Map<String, Object> handledGtAttribs = new HashMap<String, Object>(handledGt.getAttributes());
|
||||
handledGtAttribs.put(PQ_KEY, pr.phaseQuality);
|
||||
Genotype phasedHomGt = new Genotype(handledGt.getSampleName(), handledGt.getAlleles(), handledGt.getLog10PError(), handledGt.getFilters(), handledGtAttribs, genotypesArePhased);
|
||||
Genotype phasedHomGt = new GenotypeBuilder(handledGt)
|
||||
.attribute(PQ_KEY, pr.phaseQuality)
|
||||
.phased(genotypesArePhased).make();
|
||||
interiorUvc.setGenotype(samp, phasedHomGt);
|
||||
}
|
||||
}
|
||||
|
|
@ -1439,7 +1440,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
}
|
||||
|
||||
public static boolean isUnfilteredCalledDiploidGenotype(Genotype gt) {
|
||||
return (gt.isNotFiltered() && gt.isCalled() && gt.getPloidy() == 2);
|
||||
return (! gt.isFiltered() && gt.isCalled() && gt.getPloidy() == 2);
|
||||
}
|
||||
|
||||
private class MultipleBaseCountsWriter {
|
||||
|
|
|
|||
|
|
@ -423,7 +423,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
}
|
||||
}
|
||||
else {
|
||||
// if (!vcComp.hasAttribute("GV"))
|
||||
// if (!vcComp.hasExtendedAttribute("GV"))
|
||||
// throw new UserException.BadInput("Variant has no GV annotation in the INFO field. " + vcComp.getChr() + ":" + vcComp.getStart());
|
||||
|
||||
if (call.isCalledAlt(callConf)) {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
return true;
|
||||
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
|
||||
// first subset to the samples
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, true);
|
||||
|
||||
// now check to see (using EXACT model) whether this should be variant
|
||||
// do we want to apply a prior? maybe user-spec?
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
|
|||
if ( samples == null || samples.isEmpty() )
|
||||
return true;
|
||||
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, false);
|
||||
if ( subContext.isPolymorphicInSamples() ) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -54,7 +55,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
* Initialize this object
|
||||
*/
|
||||
public GenotypeConcordance() {
|
||||
final int nGenotypeTypes = Genotype.Type.values().length;
|
||||
final int nGenotypeTypes = GenotypeType.values().length;
|
||||
truthByCalledGenotypeCounts = new long[nGenotypeTypes][nGenotypeTypes];
|
||||
}
|
||||
|
||||
|
|
@ -75,11 +76,11 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
if (eval != null) {
|
||||
for (final Genotype g : eval.getGenotypes() ) {
|
||||
final String sample = g.getSampleName();
|
||||
final Genotype.Type called = g.getType();
|
||||
final Genotype.Type truth;
|
||||
final GenotypeType called = g.getType();
|
||||
final GenotypeType truth;
|
||||
|
||||
if (!validationIsValidVC || !validation.hasGenotype(sample)) {
|
||||
truth = Genotype.Type.NO_CALL;
|
||||
truth = GenotypeType.NO_CALL;
|
||||
} else {
|
||||
truth = validation.getGenotype(sample).getType();
|
||||
}
|
||||
|
|
@ -90,19 +91,19 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
// otherwise, mark no-calls for all samples
|
||||
else {
|
||||
final Genotype.Type called = Genotype.Type.NO_CALL;
|
||||
final GenotypeType called = GenotypeType.NO_CALL;
|
||||
|
||||
for (final Genotype g : validation.getGenotypes()) {
|
||||
final Genotype.Type truth = g.getType();
|
||||
final GenotypeType truth = g.getType();
|
||||
incrValue(truth, called);
|
||||
|
||||
// print out interesting sites
|
||||
/*
|
||||
if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) {
|
||||
if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) {
|
||||
if ( (truth == GenotypeType.HOM_VAR || truth == GenotypeType.HET) && called == GenotypeType.NO_CALL ) {
|
||||
super.getVEWalker().gcLog.printf("%s FN %s%n", group, validation);
|
||||
}
|
||||
if ( (called == Genotype.Type.HOM_VAR || called == Genotype.Type.HET) && truth == Genotype.Type.HOM_REF ) {
|
||||
if ( (called == GenotypeType.HOM_VAR || called == GenotypeType.HET) && truth == GenotypeType.HOM_REF ) {
|
||||
super.getVEWalker().gcLog.printf("%s FP %s%n", group, validation);
|
||||
}
|
||||
}
|
||||
|
|
@ -121,36 +122,36 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
* @param truth the truth type
|
||||
* @param called the called type
|
||||
*/
|
||||
private void incrValue(final Genotype.Type truth, final Genotype.Type called) {
|
||||
private void incrValue(final GenotypeType truth, final GenotypeType called) {
|
||||
truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]++;
|
||||
}
|
||||
|
||||
private long count(final Genotype.Type truth, final Genotype.Type called) {
|
||||
private long count(final GenotypeType truth, final GenotypeType called) {
|
||||
return truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()];
|
||||
}
|
||||
|
||||
private long count(final EnumSet<Genotype.Type> truth, final Genotype.Type called) {
|
||||
private long count(final EnumSet<GenotypeType> truth, final GenotypeType called) {
|
||||
return count(truth, EnumSet.of(called));
|
||||
}
|
||||
|
||||
private long count(final Genotype.Type truth, final EnumSet<Genotype.Type> called) {
|
||||
private long count(final GenotypeType truth, final EnumSet<GenotypeType> called) {
|
||||
return count(EnumSet.of(truth), called);
|
||||
}
|
||||
|
||||
private long count(final EnumSet<Genotype.Type> truth, final EnumSet<Genotype.Type> called) {
|
||||
private long count(final EnumSet<GenotypeType> truth, final EnumSet<GenotypeType> called) {
|
||||
long sum = 0;
|
||||
for ( final Genotype.Type truth1 : truth ) {
|
||||
for ( final Genotype.Type called1 : called ) {
|
||||
for ( final GenotypeType truth1 : truth ) {
|
||||
for ( final GenotypeType called1 : called ) {
|
||||
sum += count(truth1, called1);
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
private long countDiag( final EnumSet<Genotype.Type> d1 ) {
|
||||
private long countDiag( final EnumSet<GenotypeType> d1 ) {
|
||||
long sum = 0;
|
||||
|
||||
for(final Genotype.Type e1 : d1 ) {
|
||||
for(final GenotypeType e1 : d1 ) {
|
||||
sum += truthByCalledGenotypeCounts[e1.ordinal()][e1.ordinal()];
|
||||
}
|
||||
|
||||
|
|
@ -159,13 +160,13 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
final EnumSet<Genotype.Type> allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET);
|
||||
final EnumSet<Genotype.Type> allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF);
|
||||
final EnumSet<Genotype.Type> allGenotypes = EnumSet.allOf(Genotype.Type.class);
|
||||
final EnumSet<GenotypeType> allVariantGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET);
|
||||
final EnumSet<GenotypeType> allCalledGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET, GenotypeType.HOM_REF);
|
||||
final EnumSet<GenotypeType> allGenotypes = EnumSet.allOf(GenotypeType.class);
|
||||
|
||||
// exact values of the table
|
||||
for ( final Genotype.Type truth : Genotype.Type.values() ) {
|
||||
for ( final Genotype.Type called : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType truth : GenotypeType.values() ) {
|
||||
for ( final GenotypeType called : GenotypeType.values() ) {
|
||||
final String field = String.format("n_true_%s_called_%s", truth, called);
|
||||
final Long value = count(truth, called);
|
||||
map.put(field, value.toString());
|
||||
|
|
@ -173,20 +174,20 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
|
||||
// counts of called genotypes
|
||||
for ( final Genotype.Type called : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType called : GenotypeType.values() ) {
|
||||
final String field = String.format("total_called_%s", called);
|
||||
final Long value = count(allGenotypes, called);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
|
||||
// counts of true genotypes
|
||||
for ( final Genotype.Type truth : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType truth : GenotypeType.values() ) {
|
||||
final String field = String.format("total_true_%s", truth);
|
||||
final Long value = count(truth, allGenotypes);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
|
||||
for ( final Genotype.Type genotype : Genotype.Type.values() ) {
|
||||
for ( final GenotypeType genotype : GenotypeType.values() ) {
|
||||
final String field = String.format("percent_%s_called_%s", genotype, genotype);
|
||||
long numer = count(genotype, genotype);
|
||||
long denom = count(EnumSet.of(genotype), allGenotypes);
|
||||
|
|
@ -215,7 +216,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
// overall genotype concordance of sites called non-ref in eval track
|
||||
// MAD: this is the non-reference discrepancy rate
|
||||
final String field = "percent_non_reference_discrepancy_rate";
|
||||
long homrefConcords = count(Genotype.Type.HOM_REF, Genotype.Type.HOM_REF);
|
||||
long homrefConcords = count(GenotypeType.HOM_REF, GenotypeType.HOM_REF);
|
||||
long allNoHomRef = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
long numer = allNoHomRef - countDiag(allVariantGenotypes);
|
||||
long denom = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
|
|
|
|||
|
|
@ -121,9 +121,9 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
int ac = 0;
|
||||
if ( vc.getNAlleles() > 2 ) {
|
||||
return SiteStatus.POLY;
|
||||
//// System.out.printf("multiple alleles %s = %s%n", vc.getAlleles(), vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||
//// System.out.printf("multiple alleles %s = %s%n", vc.getAlleles(), vc.getExtendedAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||
// // todo -- omg this is painful. We need a better approach to dealing with multi-valued attributes
|
||||
// for ( String v : (List<String>)vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||
// for ( String v : (List<String>)vc.getExtendedAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||
// ac += Integer.valueOf(v);
|
||||
//// System.out.printf(" ac = %d%n", ac);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -241,7 +241,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
// update transition / transversion ratio
|
||||
if ( titvTable != null ) titvTable.inc(type, g.getSampleName());
|
||||
|
||||
if ( g.hasAttribute(VCFConstants.DEPTH_KEY) )
|
||||
if ( g.hasDP() )
|
||||
depthPerSample.inc(type, g.getSampleName());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
|
||||
VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles());
|
||||
VariantContext vcsub = vc.subContextFromSamples(sampleNames, false);
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
|
||||
|
||||
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ public class LeftAlignVariants extends RodWalker<Integer, Integer> {
|
|||
newA = Allele.NO_CALL;
|
||||
newAlleles.add(newA);
|
||||
}
|
||||
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).referenceBaseForIndel(refBaseForIndel).make();
|
||||
|
|
|
|||
|
|
@ -315,6 +315,15 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="fullyDecode", doc="If true, the incoming VariantContext will be fully decoded", required=false)
|
||||
private boolean fullyDecode = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="forceGenotypesDecode", doc="If true, the incoming VariantContext will have its genotypes forcibly decoded by computing AC across all genotypes. For efficiency testing only", required=false)
|
||||
private boolean forceGenotypesDecode = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="justRead", doc="If true, we won't actually write the output file. For efficiency testing only", required=false)
|
||||
private boolean justRead = false;
|
||||
|
||||
|
||||
/* Private class used to store the intermediate variants in the integer random selection process */
|
||||
private class RandomVariantStructure {
|
||||
private VariantContext vc;
|
||||
|
|
@ -392,11 +401,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles);
|
||||
samples.removeAll(XLsamplesFromFile);
|
||||
samples.removeAll(XLsampleNames);
|
||||
NO_SAMPLES_SPECIFIED = NO_SAMPLES_SPECIFIED && XLsampleNames.isEmpty();
|
||||
|
||||
if ( samples.size() == 0 && !NO_SAMPLES_SPECIFIED )
|
||||
throw new UserException("All samples requested to be included were also requested to be excluded.");
|
||||
|
||||
for ( String sample : samples )
|
||||
if ( ! NO_SAMPLES_SPECIFIED )
|
||||
for ( String sample : samples )
|
||||
logger.info("Including sample '" + sample + "'");
|
||||
|
||||
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
||||
|
|
@ -494,7 +505,16 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
for (VariantContext vc : vcs) {
|
||||
if ( fullyDecode ) vc = vc.fullyDecode(vcfRods.get(vc.getSource()));
|
||||
// an option for performance testing only
|
||||
if ( fullyDecode )
|
||||
vc = vc.fullyDecode(vcfRods.get(vc.getSource()));
|
||||
|
||||
// an option for performance testing only
|
||||
if ( forceGenotypesDecode ) {
|
||||
final int x = vc.getCalledChrCount();
|
||||
//logger.info("forceGenotypesDecode with getCalledChrCount() = " + );
|
||||
}
|
||||
|
||||
if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) )
|
||||
continue;
|
||||
|
||||
|
|
@ -538,7 +558,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if (!selectedTypes.contains(vc.getType()))
|
||||
continue;
|
||||
|
||||
VariantContext sub = subsetRecord(vc, samples, EXCLUDE_NON_VARIANTS);
|
||||
VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS);
|
||||
|
||||
if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) {
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(tracker, ref, context, sub)).filters(sub.getFiltersMaybeNull());
|
||||
|
|
@ -559,7 +579,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
randomlyAddVariant(++variantNumber, sub);
|
||||
}
|
||||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub);
|
||||
if ( ! justRead )
|
||||
vcfWriter.add(sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -687,18 +708,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
|
||||
*
|
||||
* @param vc the VariantContext record to subset
|
||||
* @param samples the samples to extract
|
||||
* @return the subsetted VariantContext
|
||||
*/
|
||||
private VariantContext subsetRecord(final VariantContext vc, final Set<String> samples, final boolean excludeNonVariants) {
|
||||
if ( samples == null || samples.isEmpty() )
|
||||
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
|
||||
if ( NO_SAMPLES_SPECIFIED || samples.isEmpty() )
|
||||
return vc;
|
||||
|
||||
final VariantContext sub;
|
||||
if ( excludeNonVariants )
|
||||
sub = vc.subContextFromSamples(samples); // strip out the alternate alleles that aren't being used
|
||||
else
|
||||
sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||
|
||||
GenotypesContext newGC = sub.getGenotypes();
|
||||
|
|
@ -708,15 +725,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
|
||||
//Remove a fraction of the genotypes if needed
|
||||
if(fractionGenotypes>0){
|
||||
if ( fractionGenotypes > 0 ){
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
for ( Genotype genotype : newGC ) {
|
||||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
|
||||
}
|
||||
else{
|
||||
genotypes.add(genotype);
|
||||
|
|
@ -750,14 +765,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
for (String sample : originalVC.getSampleNames()) {
|
||||
Genotype g = originalVC.getGenotype(sample);
|
||||
|
||||
if ( g.isNotFiltered() ) {
|
||||
|
||||
String dp = (String) g.getAttribute("DP");
|
||||
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
depth += Integer.valueOf(dp);
|
||||
}
|
||||
if ( ! g.isFiltered() ) {
|
||||
if ( g.hasDP() )
|
||||
depth += g.getDP();
|
||||
}
|
||||
}
|
||||
|
||||
builder.attribute("DP", depth);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -288,8 +288,8 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
|
||||
private byte getStandardEncoding(Genotype g, int offset) {
|
||||
byte b;
|
||||
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
|
||||
b = NO_CALL;
|
||||
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
|
||||
b = NO_CALL;
|
||||
} else if ( g.isHomRef() ) {
|
||||
b = HOM_REF;
|
||||
} else if ( g.isHomVar() ) {
|
||||
|
|
@ -305,7 +305,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
|
|||
|
||||
private byte getFlippedEncoding(Genotype g, int offset) {
|
||||
byte b;
|
||||
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
|
||||
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
|
||||
b = NO_CALL;
|
||||
} else if ( g.isHomRef() ) {
|
||||
b = HOM_VAR;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
|
@ -314,8 +315,12 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
if ( addGenotypeFields ) {
|
||||
for ( final String sample : samples ) {
|
||||
for ( final String gf : genotypeFields ) {
|
||||
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAttribute(gf) )
|
||||
addFieldValue(vc.getGenotype(sample).getAttribute(gf), records);
|
||||
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAnyAttribute(gf) ) {
|
||||
if ( gf.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
addFieldValue(vc.getGenotype(sample).getGenotypeString(true), records);
|
||||
else
|
||||
addFieldValue(vc.getGenotype(sample).getAnyAttribute(gf), records);
|
||||
}
|
||||
else
|
||||
addFieldValue(MISSING_DATA, records);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
// set the appropriate sample name if necessary
|
||||
if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) {
|
||||
Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName);
|
||||
Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
|
||||
builder.genotypes(g);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -30,7 +31,7 @@ public class MendelianViolation {
|
|||
private boolean allCalledOnly = true;
|
||||
|
||||
//Stores occurrences of inheritance
|
||||
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
|
||||
private EnumMap<GenotypeType, EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> inheritance;
|
||||
|
||||
private int violations_total=0;
|
||||
|
||||
|
|
@ -74,119 +75,119 @@ public class MendelianViolation {
|
|||
|
||||
//Count of HomRef/HomRef/HomRef trios
|
||||
public int getRefRefRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of HomVar/HomVar/HomVar trios
|
||||
public int getVarVarVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of HomRef/HomVar/Het trios
|
||||
public int getRefVarHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
|
||||
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HET) +
|
||||
inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/Het trios
|
||||
public int getHetHetHet(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomRef trios
|
||||
public int getHetHetHomRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomVar trios
|
||||
public int getHetHetHomVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ 2*inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
//return parentsHetHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ 2*inheritance.get(GenotypeType.HET).get(GenotypeType.HET).get(GenotypeType.HOM_VAR);
|
||||
//return parentsHetHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
//return parentsHomRefHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
//return parentsHomRefHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HET)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HET);
|
||||
//return parentsHomVarHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR);
|
||||
//return parentsHomVarHet_childVar;
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
|
||||
public int getParentsRefRefChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HET
|
||||
public int getParentsRefRefChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HET -> HOM_VAR
|
||||
public int getParentsRefHetChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HET).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
|
||||
public int getParentsRefVarChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
|
||||
public int getParentsRefVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_REF).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HET -> HOM_REF
|
||||
public int getParentsVarHetChildRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HET).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF)
|
||||
+ inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HET).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
|
||||
public int getParentsVarVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
|
||||
public int getParentsVarVarChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
return inheritance.get(GenotypeType.HOM_VAR).get(GenotypeType.HOM_VAR).get(GenotypeType.HET);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -362,12 +363,12 @@ public class MendelianViolation {
|
|||
|
||||
private void createInheritanceMap(){
|
||||
|
||||
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
inheritance = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||
for(GenotypeType mType : GenotypeType.values()){
|
||||
inheritance.put(mType, new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
||||
for(GenotypeType dType : GenotypeType.values()){
|
||||
inheritance.get(mType).put(dType, new EnumMap<GenotypeType,Integer>(GenotypeType.class));
|
||||
for(GenotypeType cType : GenotypeType.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
|
|
@ -376,9 +377,9 @@ public class MendelianViolation {
|
|||
}
|
||||
|
||||
private void clearInheritanceMap(){
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
for(GenotypeType mType : GenotypeType.values()){
|
||||
for(GenotypeType dType : GenotypeType.values()){
|
||||
for(GenotypeType cType : GenotypeType.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -225,9 +225,9 @@ public class SequenceDictionaryUtils {
|
|||
return false;
|
||||
|
||||
// todo -- reenable if we want to be really strict here
|
||||
// if (me.getAttribute(SAMSequenceRecord.MD5_TAG) != null && that.getAttribute(SAMSequenceRecord.MD5_TAG) != null) {
|
||||
// final BigInteger thisMd5 = new BigInteger((String)me.getAttribute(SAMSequenceRecord.MD5_TAG), 16);
|
||||
// final BigInteger thatMd5 = new BigInteger((String)that.getAttribute(SAMSequenceRecord.MD5_TAG), 16);
|
||||
// if (me.getExtendedAttribute(SAMSequenceRecord.MD5_TAG) != null && that.getExtendedAttribute(SAMSequenceRecord.MD5_TAG) != null) {
|
||||
// final BigInteger thisMd5 = new BigInteger((String)me.getExtendedAttribute(SAMSequenceRecord.MD5_TAG), 16);
|
||||
// final BigInteger thatMd5 = new BigInteger((String)that.getExtendedAttribute(SAMSequenceRecord.MD5_TAG), 16);
|
||||
// if (!thisMd5.equals(thatMd5)) {
|
||||
// return false;
|
||||
// }
|
||||
|
|
|
|||
|
|
@ -223,6 +223,20 @@ public class Utils {
|
|||
return ret.toString();
|
||||
}
|
||||
|
||||
public static String join(String separator, int[] ints) {
|
||||
if ( ints == null || ints.length == 0)
|
||||
return "";
|
||||
else {
|
||||
StringBuilder ret = new StringBuilder();
|
||||
ret.append(ints[0]);
|
||||
for (int i = 1; i < ints.length; ++i) {
|
||||
ret.append(separator);
|
||||
ret.append(ints[i]);
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
|
||||
* elti objects (note there's no actual space between sep and the elti elements). Returns
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
|
|
@ -33,9 +35,7 @@ import org.broad.tribble.readers.AsciiLineReader;
|
|||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
|
|
@ -45,15 +45,45 @@ import java.io.FileNotFoundException;
|
|||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec {
|
||||
/**
|
||||
* Decode BCF2 files
|
||||
*/
|
||||
public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||
private VCFHeader header = null;
|
||||
|
||||
/**
|
||||
* Maps offsets (encoded in BCF) into contig names (from header) for the CHROM field
|
||||
*/
|
||||
private final ArrayList<String> contigNames = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* Maps header string names (encoded in VCF) into strings found in the BCF header
|
||||
*
|
||||
* Initialized when processing the header
|
||||
*/
|
||||
private ArrayList<String> dictionary;
|
||||
|
||||
/**
|
||||
* Our decoder that reads low-level objects from the BCF2 records
|
||||
*/
|
||||
private final BCF2Decoder decoder = new BCF2Decoder();
|
||||
private boolean skipGenotypes = false;
|
||||
|
||||
/**
|
||||
* Provides some sanity checking on the header
|
||||
*/
|
||||
private final static int MAX_HEADER_SIZE = 0x08000000;
|
||||
|
||||
/**
|
||||
* Genotype field decoders that are initialized when the header is read
|
||||
*/
|
||||
private BCF2GenotypeFieldDecoders gtFieldDecoders = null;
|
||||
|
||||
// for error handling
|
||||
private int recordNo = 0;
|
||||
private int pos = 0;
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Feature codec interface functions
|
||||
|
|
@ -62,28 +92,30 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
|
||||
@Override
|
||||
public Feature decodeLoc( final PositionalBufferedStream inputStream ) {
|
||||
return decode(inputStream);
|
||||
// TODO: a less expensive version of decodeLoc() that doesn't use VariantContext
|
||||
// TODO: very easy -- just decodeSitesBlock, and then skip to end of end of sites block
|
||||
// TODO: and then skip genotypes block
|
||||
recordNo++;
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
|
||||
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
||||
final int genotypeBlockSize = decoder.readBlockSize(inputStream); // necessary because it's in the stream
|
||||
decoder.readNextBlock(sitesBlockSize, inputStream);
|
||||
decodeSiteLoc(builder);
|
||||
|
||||
return builder.fullyDecoded(true).make();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VariantContext decode( final PositionalBufferedStream inputStream ) {
|
||||
recordNo++;
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
|
||||
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
||||
final int genotypeBlockSize = decoder.readBlockSize(inputStream);
|
||||
decoder.readNextBlock(sitesBlockSize, inputStream);
|
||||
final SitesInfoForDecoding info = decodeSitesBlock(builder);
|
||||
|
||||
if ( isSkippingGenotypes() ) {
|
||||
decoder.skipNextBlock(genotypeBlockSize, inputStream);
|
||||
} else {
|
||||
decoder.readNextBlock(genotypeBlockSize, inputStream);
|
||||
decodeGenotypes(info, builder);
|
||||
}
|
||||
decodeSiteLoc(builder);
|
||||
final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
|
||||
|
||||
decoder.readNextBlock(genotypeBlockSize, inputStream);
|
||||
createLazyGenotypesDecoder(info, builder);
|
||||
return builder.fullyDecoded(true).make();
|
||||
}
|
||||
|
||||
|
|
@ -97,16 +129,16 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
try {
|
||||
// note that this reads the magic as well, and so does double duty
|
||||
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
|
||||
throw new UserException.MalformedBCF2("Input stream does not begin with BCF2 magic");
|
||||
error("Input stream does not begin with BCF2 magic");
|
||||
|
||||
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
||||
|
||||
if ( headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB
|
||||
throw new UserException.MalformedBCF2("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE);
|
||||
error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE);
|
||||
|
||||
final byte[] headerBytes = new byte[headerSizeInBytes];
|
||||
if ( inputStream.read(headerBytes) != headerSizeInBytes )
|
||||
throw new UserException.MalformedBCF2("Couldn't read all of the bytes specified in the header length = " + headerSizeInBytes);
|
||||
error("Couldn't read all of the bytes specified in the header length = " + headerSizeInBytes);
|
||||
|
||||
final PositionalBufferedStream bps = new PositionalBufferedStream(new ByteArrayInputStream(headerBytes));
|
||||
final AsciiLineReader headerReader = new AsciiLineReader(bps);
|
||||
|
|
@ -118,12 +150,24 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
}
|
||||
|
||||
// create the config offsets
|
||||
for ( final VCFContigHeaderLine contig : header.getContigLines())
|
||||
contigNames.add(contig.getID());
|
||||
if ( ! header.getContigLines().isEmpty() ) {
|
||||
logger.info("Found contig lines in BCF2 file, using those");
|
||||
contigNames.clear();
|
||||
for ( final VCFContigHeaderLine contig : header.getContigLines()) {
|
||||
if ( contig.getID() == null || contig.getID().equals("") )
|
||||
error("found a contig with an invalid ID " + contig);
|
||||
contigNames.add(contig.getID());
|
||||
}
|
||||
} else {
|
||||
logger.info("Didn't find any contig lines in BCF2 file, falling back (dangerously) to GATK reference dictionary");
|
||||
}
|
||||
|
||||
// create the string dictionary
|
||||
dictionary = parseDictionary(header);
|
||||
|
||||
// prepare the genotype field decoders
|
||||
gtFieldDecoders = new BCF2GenotypeFieldDecoders(header);
|
||||
|
||||
// position right before next line (would be right before first real record byte at end of header)
|
||||
return new FeatureCodecHeader(header, inputStream.getPosition());
|
||||
}
|
||||
|
|
@ -153,7 +197,6 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@Override
|
||||
public void setGenomeLocParser(final GenomeLocParser genomeLocParser) {
|
||||
// initialize contigNames to standard ones in reference
|
||||
|
|
@ -161,14 +204,6 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
contigNames.add(contig.getSequenceName());
|
||||
}
|
||||
|
||||
public boolean isSkippingGenotypes() {
|
||||
return skipGenotypes;
|
||||
}
|
||||
|
||||
public void setSkipGenotypes(final boolean skipGenotypes) {
|
||||
this.skipGenotypes = skipGenotypes;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// implicit block
|
||||
|
|
@ -182,50 +217,83 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final SitesInfoForDecoding decodeSitesBlock(final VariantContextBuilder builder) {
|
||||
final int contigOffset = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
/**
|
||||
* Decode the sites level data from this classes decoder
|
||||
*
|
||||
* @param builder
|
||||
* @return
|
||||
*/
|
||||
@Requires({"builder != null"})
|
||||
private final void decodeSiteLoc(final VariantContextBuilder builder) {
|
||||
final int contigOffset = decoder.decodeInt(BCF2Type.INT32);
|
||||
final String contig = lookupContigName(contigOffset);
|
||||
builder.chr(contig);
|
||||
|
||||
final int pos = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
final int refLength = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
this.pos = decoder.decodeInt(BCF2Type.INT32);
|
||||
final int refLength = decoder.decodeInt(BCF2Type.INT32);
|
||||
builder.start((long)pos);
|
||||
builder.stop((long)(pos + refLength - 1)); // minus one because of our open intervals
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the sites level data from this classes decoder
|
||||
*
|
||||
* @param builder
|
||||
* @return
|
||||
*/
|
||||
@Requires({"builder != null", "decoder != null"})
|
||||
@Ensures({"result != null", "result.isValid()"})
|
||||
private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) {
|
||||
final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
|
||||
if ( qual != null ) {
|
||||
builder.log10PError(((Double)qual) / -10.0);
|
||||
}
|
||||
|
||||
final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32.getSizeInBytes());
|
||||
final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32);
|
||||
final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32);
|
||||
final int nAlleles = nAlleleInfo >> 16;
|
||||
final int nInfo = nAlleleInfo & 0x00FF;
|
||||
final int nFormatFields = nFormatSamples >> 24;
|
||||
final int nSamples = nFormatSamples & 0x0FFF;
|
||||
final int nInfo = nAlleleInfo & 0x0000FFFF;
|
||||
final int nFormatFields = nFormatSamples >> 24;
|
||||
final int nSamples = nFormatSamples & 0x00FFFFF;
|
||||
|
||||
decodeID(builder);
|
||||
final ArrayList<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
|
||||
decodeFilter(builder);
|
||||
decodeInfo(builder, nInfo);
|
||||
|
||||
return new SitesInfoForDecoding(pos, nFormatFields, nSamples, alleles);
|
||||
final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles);
|
||||
if ( ! info.isValid() )
|
||||
error("Sites info is malformed: " + info);
|
||||
return info;
|
||||
}
|
||||
|
||||
private final static class SitesInfoForDecoding {
|
||||
final int pos;
|
||||
protected final static class SitesInfoForDecoding {
|
||||
final int nFormatFields;
|
||||
final int nSamples;
|
||||
final ArrayList<Allele> alleles;
|
||||
|
||||
private SitesInfoForDecoding(final int pos, final int nFormatFields, final int nSamples, final ArrayList<Allele> alleles) {
|
||||
this.pos = pos;
|
||||
private SitesInfoForDecoding(final int nFormatFields, final int nSamples, final ArrayList<Allele> alleles) {
|
||||
this.nFormatFields = nFormatFields;
|
||||
this.nSamples = nSamples;
|
||||
this.alleles = alleles;
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return nFormatFields >= 0 &&
|
||||
nSamples >= 0 &&
|
||||
alleles != null && ! alleles.isEmpty() && alleles.get(0).isReference();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("nFormatFields = %d, nSamples = %d, alleles = %s", nFormatFields, nSamples, alleles);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the id field in this BCF2 file and store it in the builder
|
||||
* @param builder
|
||||
*/
|
||||
private void decodeID( final VariantContextBuilder builder ) {
|
||||
final String id = (String)decoder.decodeTypedValue();
|
||||
|
||||
|
|
@ -235,6 +303,15 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
builder.id(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Annoying routine that deals with allele clipping from the BCF2 encoding to the standard
|
||||
* GATK encoding.
|
||||
*
|
||||
* @param position
|
||||
* @param ref
|
||||
* @param unclippedAlleles
|
||||
* @return
|
||||
*/
|
||||
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
||||
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
|
|
@ -244,6 +321,14 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
return unclippedAlleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the alleles from this BCF2 file and put the results in builder
|
||||
* @param builder
|
||||
* @param pos
|
||||
* @param nAlleles
|
||||
* @return the alleles
|
||||
*/
|
||||
@Requires("nAlleles > 0")
|
||||
private ArrayList<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) {
|
||||
// TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(nAlleles);
|
||||
|
|
@ -259,15 +344,21 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
alleles.add(Allele.create(allele, false));
|
||||
}
|
||||
}
|
||||
assert ref != null;
|
||||
|
||||
alleles = clipAllelesIfNecessary(pos, ref, alleles);
|
||||
builder.alleles(alleles);
|
||||
|
||||
assert ref.length() > 0;
|
||||
builder.referenceBaseForIndel(ref.getBytes()[0]);
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the filter field of this BCF2 file and store the result in the builder
|
||||
* @param builder
|
||||
*/
|
||||
private void decodeFilter( final VariantContextBuilder builder ) {
|
||||
final Object value = decoder.decodeTypedValue();
|
||||
|
||||
|
|
@ -275,17 +366,28 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
builder.unfiltered();
|
||||
else {
|
||||
if ( value instanceof Integer )
|
||||
// fast path for single integer result
|
||||
builder.filter(getDictionaryString((Integer)value));
|
||||
else {
|
||||
for ( int offset : (List<Integer>)value )
|
||||
for ( final int offset : (List<Integer>)value )
|
||||
builder.filter(getDictionaryString(offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop over the info field key / value pairs in this BCF2 file and decode them into the builder
|
||||
*
|
||||
* @param builder
|
||||
* @param numInfoFields
|
||||
*/
|
||||
@Requires("numInfoFields >= 0")
|
||||
private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) {
|
||||
final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields);
|
||||
if ( numInfoFields == 0 )
|
||||
// fast path, don't bother doing any work if there are no fields
|
||||
return;
|
||||
|
||||
final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields);
|
||||
for ( int i = 0; i < numInfoFields; i++ ) {
|
||||
final String key = getDictionaryString();
|
||||
Object value = decoder.decodeTypedValue();
|
||||
|
|
@ -297,143 +399,98 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
|||
builder.attributes(infoFieldEntries);
|
||||
}
|
||||
|
||||
private void decodeGenotypes( final SitesInfoForDecoding siteInfo, final VariantContextBuilder builder ) {
|
||||
final List<String> samples = new ArrayList<String>(header.getGenotypeSamples());
|
||||
final int nSamples = siteInfo.nSamples;
|
||||
final int nFields = siteInfo.nFormatFields;
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Decoding Genotypes
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
/**
|
||||
* Create the lazy loader for the genotypes data, and store it in the builder
|
||||
* so that the VC will be able to decode on demand the genotypes data
|
||||
*
|
||||
* @param siteInfo
|
||||
* @param builder
|
||||
*/
|
||||
private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo,
|
||||
final VariantContextBuilder builder ) {
|
||||
if (siteInfo.nSamples > 0) {
|
||||
final LazyGenotypesContext.LazyParser lazyParser =
|
||||
new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields);
|
||||
final int nGenotypes = header.getGenotypeSamples().size();
|
||||
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser,
|
||||
new LazyData(siteInfo.nFormatFields, decoder.getRecordBytes()),
|
||||
nGenotypes);
|
||||
|
||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(nFields, nSamples);
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
// all of the information we need for each genotype, with default values
|
||||
final String sampleName = samples.get(i);
|
||||
List<Allele> alleles = null;
|
||||
boolean isPhased = false;
|
||||
double log10PError = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> filters = null;
|
||||
Map<String, Object> attributes = null;
|
||||
double[] log10Likelihoods = null;
|
||||
// did we resort the sample names? If so, we need to load the genotype data
|
||||
if ( !header.samplesWereAlreadySorted() )
|
||||
lazy.decode();
|
||||
|
||||
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
||||
final String field = entry.getKey();
|
||||
Object value = entry.getValue().get(i);
|
||||
try {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
alleles = decodeGenotypeAlleles(siteInfo.alleles, (List<Integer>)value);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
log10PError = ((Integer)value) / -10.0;
|
||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
final List<Integer> pls = (List<Integer>)value;
|
||||
if ( pls != null ) { // we have a PL field
|
||||
log10Likelihoods = new double[pls.size()];
|
||||
for ( int j = 0; j < log10Likelihoods.length; j++ ) {
|
||||
final double d = pls.get(j);
|
||||
log10Likelihoods[j] = d == -0.0 ? 0.0 : d / -10.0;
|
||||
}
|
||||
}
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
||||
//filters = new HashSet<String>(values.get(i));
|
||||
} else { // add to attributes
|
||||
if ( value != null ) { // don't add missing values
|
||||
if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
|
||||
if ( value instanceof List && ((List)value).size() == 1)
|
||||
value = ((List)value).get(0);
|
||||
attributes.put(field, value);
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value in the "
|
||||
+ " BCF file. Value was " + value);
|
||||
}
|
||||
}
|
||||
|
||||
if ( alleles == null ) throw new UserException.MalformedBCF2("BUG: no alleles found");
|
||||
|
||||
final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
builder.genotypes(genotypes);
|
||||
}
|
||||
|
||||
private final List<Allele> decodeGenotypeAlleles(final ArrayList<Allele> siteAlleles, final List<Integer> encoded) {
|
||||
if ( encoded == null )
|
||||
// no called sample GT = .
|
||||
return Collections.emptyList();
|
||||
else {
|
||||
// we have at least some alleles to decode
|
||||
final List<Allele> gt = new ArrayList<Allele>(encoded.size());
|
||||
for ( final Integer encode : encoded ) {
|
||||
if ( encode == null ) // absent, as are all following by definition
|
||||
return gt;
|
||||
else {
|
||||
final int offset = encode >> 1;
|
||||
if ( offset == 0 )
|
||||
gt.add(Allele.NO_CALL);
|
||||
else
|
||||
gt.add(siteAlleles.get(offset - 1));
|
||||
}
|
||||
}
|
||||
|
||||
return gt;
|
||||
builder.genotypesNoValidation(lazy);
|
||||
}
|
||||
}
|
||||
|
||||
private final Map<String, List<Object>> decodeGenotypeFieldValues(final int nFields, final int nSamples) {
|
||||
assert (nFields > 0 && nSamples > 0) || (nFields == 0 && nSamples == 0);
|
||||
public static class LazyData {
|
||||
final public int nGenotypeFields;
|
||||
final public byte[] bytes;
|
||||
|
||||
if ( nFields == 0 ) // fast path exit for sites only file
|
||||
return Collections.emptyMap();
|
||||
else {
|
||||
final Map<String, List<Object>> map = new LinkedHashMap<String, List<Object>>(nFields);
|
||||
|
||||
for ( int i = 0; i < nFields; i++ ) {
|
||||
final String field = getDictionaryString();
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final List<Object> values = new ArrayList<Object>(nSamples);
|
||||
for ( int j = 0; j < nSamples; j++ )
|
||||
values.add(decoder.decodeTypedValue(typeDescriptor));
|
||||
map.put(field, values);
|
||||
}
|
||||
|
||||
return map;
|
||||
@Requires({"nGenotypeFields > 0", "bytes != null"})
|
||||
public LazyData(final int nGenotypeFields, final byte[] bytes) {
|
||||
this.nGenotypeFields = nGenotypeFields;
|
||||
this.bytes = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
private final String getDictionaryString() {
|
||||
return getDictionaryString((Integer) decoder.decodeTypedValue());
|
||||
}
|
||||
|
||||
private final String getDictionaryString(final int offset) {
|
||||
if ( offset >= dictionary.size() ) throw new UserException.MalformedBCF2("BUG: no dictionary field found at offset " + offset);
|
||||
final String field = dictionary.get(offset);
|
||||
return field;
|
||||
@Requires("offset < dictionary.size()")
|
||||
@Ensures("result != null")
|
||||
protected final String getDictionaryString(final int offset) {
|
||||
return dictionary.get(offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate the config offset as encoded in the BCF file into the actual string
|
||||
* name of the contig from the dictionary
|
||||
*
|
||||
* @param contigOffset
|
||||
* @return
|
||||
*/
|
||||
@Requires({"contigOffset >= 0", "contigOffset < contigNames.size()"})
|
||||
@Ensures("result != null")
|
||||
private final String lookupContigName( final int contigOffset ) {
|
||||
if ( contigOffset < contigNames.size() ) {
|
||||
return contigNames.get(contigOffset);
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBCF2(String.format("No contig at index %d present in the sequence dictionary from the BCF2 header (%s)", contigOffset, contigNames));
|
||||
}
|
||||
return contigNames.get(contigOffset);
|
||||
}
|
||||
|
||||
@Requires("header != null")
|
||||
@Ensures({"result != null", "! result.isEmpty()"})
|
||||
private final ArrayList<String> parseDictionary(final VCFHeader header) {
|
||||
final ArrayList<String> dict = BCF2Utils.makeDictionary(header);
|
||||
|
||||
// if we got here we never found a dictionary, or there are no elements in the dictionary
|
||||
if ( dict.size() == 0 )
|
||||
throw new UserException.MalformedBCF2("Dictionary header element was absent or empty");
|
||||
if ( dict.isEmpty() )
|
||||
error("Dictionary header element was absent or empty");
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the VCFHeader we found in this BCF2 file
|
||||
*/
|
||||
protected VCFHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
@Requires("field != null")
|
||||
@Ensures("result != null")
|
||||
protected BCF2GenotypeFieldDecoders.Decoder getGenotypeFieldDecoder(final String field) {
|
||||
return gtFieldDecoders.getDecoder(field);
|
||||
}
|
||||
|
||||
private final void error(final String message) throws RuntimeException {
|
||||
throw new UserException.MalformedBCF2(String.format("At record %d with position %d:", recordNo, pos, message));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -33,12 +35,13 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class BCF2Decoder {
|
||||
public final class BCF2Decoder {
|
||||
final protected static Logger logger = Logger.getLogger(FeatureCodec.class);
|
||||
|
||||
byte[] recordBytes;
|
||||
ByteArrayInputStream recordStream;
|
||||
byte[] recordBytes = null;
|
||||
ByteArrayInputStream recordStream = null;
|
||||
|
||||
public BCF2Decoder() {
|
||||
// nothing to do
|
||||
|
|
@ -66,6 +69,7 @@ public class BCF2Decoder {
|
|||
* @return
|
||||
*/
|
||||
public void readNextBlock(final int blockSizeInBytes, final InputStream stream) {
|
||||
if ( blockSizeInBytes < 0 ) throw new UserException.MalformedBCF2("Invalid block size " + blockSizeInBytes);
|
||||
setRecordBytes(readRecordBytes(blockSizeInBytes, stream));
|
||||
}
|
||||
|
||||
|
|
@ -112,9 +116,9 @@ public class BCF2Decoder {
|
|||
*
|
||||
* @param recordBytes
|
||||
*/
|
||||
@Requires("recordBytes != null")
|
||||
@Ensures({"this.recordBytes == recordBytes", "recordStream != null"})
|
||||
public void setRecordBytes(final byte[] recordBytes) {
|
||||
assert recordBytes != null;
|
||||
|
||||
this.recordBytes = recordBytes;
|
||||
this.recordStream = new ByteArrayInputStream(recordBytes);
|
||||
}
|
||||
|
|
@ -131,7 +135,7 @@ public class BCF2Decoder {
|
|||
}
|
||||
|
||||
public final Object decodeTypedValue(final byte typeDescriptor) {
|
||||
final int size = BCF2Utils.sizeIsOverflow(typeDescriptor) ? decodeVectorSize() : BCF2Utils.decodeSize(typeDescriptor);
|
||||
final int size = decodeNumberOfElements(typeDescriptor);
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
assert size >= 0;
|
||||
|
|
@ -155,7 +159,7 @@ public class BCF2Decoder {
|
|||
|
||||
public final Object decodeSingleValue(final BCF2Type type) {
|
||||
// TODO -- decodeTypedValue should integrate this routine
|
||||
final int value = BCF2Utils.readInt(type.getSizeInBytes(), recordStream);
|
||||
final int value = decodeInt(type);
|
||||
|
||||
if ( value == type.getMissingBytes() )
|
||||
return null;
|
||||
|
|
@ -184,26 +188,107 @@ public class BCF2Decoder {
|
|||
final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
|
||||
try {
|
||||
recordStream.read(bytes);
|
||||
final String s = new String(bytes);
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
|
||||
|
||||
int goodLength = 0;
|
||||
for ( ; goodLength < bytes.length ; goodLength++ )
|
||||
if ( bytes[goodLength] == 0 ) break;
|
||||
|
||||
if ( goodLength == 0 )
|
||||
return null;
|
||||
else {
|
||||
final String s = new String(bytes, 0, goodLength);
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
|
||||
}
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException("readByte failure", e);
|
||||
}
|
||||
}
|
||||
|
||||
private final int decodeVectorSize() {
|
||||
final byte typeDescriptor = readTypeDescriptor();
|
||||
final int size = BCF2Utils.decodeSize(typeDescriptor);
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
assert size == 1;
|
||||
assert type == BCF2Type.INT8 || type == BCF2Type.INT16 || type == BCF2Type.INT32;
|
||||
|
||||
return decodeInt(type.getSizeInBytes());
|
||||
@Ensures("result >= 0")
|
||||
public final int decodeNumberOfElements(final byte typeDescriptor) {
|
||||
if ( BCF2Utils.sizeIsOverflow(typeDescriptor) )
|
||||
// -1 ensures we explode immediately with a bad size if the result is missing
|
||||
return decodeInt(readTypeDescriptor(), -1);
|
||||
else
|
||||
// the size is inline, so just decode it
|
||||
return BCF2Utils.decodeSize(typeDescriptor);
|
||||
}
|
||||
|
||||
public final int decodeInt(int bytesForEachInt) {
|
||||
return BCF2Utils.readInt(bytesForEachInt, recordStream);
|
||||
/**
|
||||
* Decode an int from the stream. If the value in the stream is missing,
|
||||
* returns missingValue. Requires the typeDescriptor indicate an inline
|
||||
* single element event
|
||||
*
|
||||
* @param typeDescriptor
|
||||
* @return
|
||||
*/
|
||||
@Requires("BCF2Utils.decodeSize(typeDescriptor) == 1")
|
||||
public final int decodeInt(final byte typeDescriptor, final int missingValue) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
final int i = decodeInt(type);
|
||||
return i == type.getMissingBytes() ? missingValue : i;
|
||||
}
|
||||
|
||||
@Requires("type != null")
|
||||
public final int decodeInt(final BCF2Type type) {
|
||||
return BCF2Utils.readInt(type.getSizeInBytes(), recordStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Low-level reader for int[]
|
||||
*
|
||||
* Requires a typeDescriptor so the function knows how many elements to read,
|
||||
* and how they are encoded.
|
||||
*
|
||||
* If size == 0 => result is null
|
||||
* If size > 0 => result depends on the actual values in the stream
|
||||
* -- If the first element read is MISSING, result is null (all values are missing)
|
||||
* -- Else result = int[N] where N is the first N non-missing values decoded
|
||||
*
|
||||
* @param maybeDest if not null we'll not allocate space for the vector, but instead use
|
||||
* the externally allocated array of ints to store values. If the
|
||||
* size of this vector is < the actual size of the elements, we'll be
|
||||
* forced to use freshly allocated arrays. Also note that padded
|
||||
* int elements are still forced to do a fresh allocation as well.
|
||||
* @return see description
|
||||
*/
|
||||
@Requires({"BCF2Type.INTEGERS.contains(type)", "size >= 0", "type != null"})
|
||||
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) {
|
||||
if ( size == 0 ) {
|
||||
return null;
|
||||
} else {
|
||||
if ( maybeDest != null && maybeDest.length < size )
|
||||
maybeDest = null; // by nulling this out we ensure that we do fresh allocations as maybeDest is too small
|
||||
|
||||
final int val1 = decodeInt(type);
|
||||
if ( val1 == type.getMissingBytes() ) {
|
||||
// fast path for first element being missing
|
||||
for ( int i = 1; i < size; i++ ) decodeInt(type);
|
||||
return null;
|
||||
} else {
|
||||
// we know we will have at least 1 element, so making the int[] is worth it
|
||||
final int[] ints = maybeDest == null ? new int[size] : maybeDest;
|
||||
ints[0] = val1; // we already read the first one
|
||||
for ( int i = 1; i < size; i++ ) {
|
||||
ints[i] = decodeInt(type);
|
||||
if ( ints[i] == type.getMissingBytes() ) {
|
||||
// read the rest of the missing values, dropping them
|
||||
for ( int j = i + 1; j < size; j++ ) decodeInt(type);
|
||||
// deal with auto-pruning by returning an int[] containing
|
||||
// only the non-MISSING values. We do this by copying the first
|
||||
// i elements, as i itself is missing
|
||||
return Arrays.copyOf(ints, i);
|
||||
}
|
||||
}
|
||||
return ints; // all of the elements were non-MISSING
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public final int[] decodeIntArray(final byte typeDescriptor) {
|
||||
final int size = decodeNumberOfElements(typeDescriptor);
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
return decodeIntArray(size, type, null);
|
||||
}
|
||||
|
||||
public final double rawFloatToFloat(final int rawFloat) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,282 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An efficient scheme for building and obtaining specialized
|
||||
* genotype field decoders. Used by the BCFCodec to parse
|
||||
* with little overhead the fields from BCF2 encoded genotype
|
||||
* records
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 6/12
|
||||
*/
|
||||
public class BCF2GenotypeFieldDecoders {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2GenotypeFieldDecoders.class);
|
||||
private final static boolean ENABLE_FASTPATH_GT = true;
|
||||
private final static int MIN_SAMPLES_FOR_FASTPATH_GENOTYPES = 0; // TODO -- update to reasonable number
|
||||
|
||||
// initialized once per writer to allow parallel writers to work
|
||||
private final HashMap<String, Decoder> genotypeFieldDecoder = new HashMap<String, Decoder>();
|
||||
private final Decoder defaultDecoder = new GenericDecoder();
|
||||
|
||||
public BCF2GenotypeFieldDecoders(final VCFHeader header) {
|
||||
// TODO -- fill in appropriate decoders for each FORMAT field in the header
|
||||
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_KEY, new GTDecoder());
|
||||
// currently the generic decoder handles FILTER values properly, in so far as we don't tolerate multiple filter field values per genotype
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FTDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new PLDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_QUALITY_KEY, new GQDecoder());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Genotype field decoder
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return decoder appropriate for field, or the generic decoder if no
|
||||
* specialized one is bound
|
||||
* @param field the GT field to decode
|
||||
* @return a non-null decoder
|
||||
*/
|
||||
@Requires("field != null")
|
||||
@Ensures("result != null")
|
||||
public Decoder getDecoder(final String field) {
|
||||
final Decoder d = genotypeFieldDecoder.get(field);
|
||||
return d == null ? defaultDecoder : d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decoder a field (implicit from creation) encoded as
|
||||
* typeDescriptor in the decoder object in the GenotypeBuilders
|
||||
* one for each sample in order.
|
||||
*
|
||||
* The way this works is that this decode method
|
||||
* iterates over the builders, decoding a genotype field
|
||||
* in BCF2 for each sample from decoder.
|
||||
*
|
||||
* This system allows us to easily use specialized
|
||||
* decoders for specific genotype field values. For example,
|
||||
* we use a special decoder to directly read the BCF2 data for
|
||||
* the PL field into a int[] rather than the generic List of Integer
|
||||
*/
|
||||
public interface Decoder {
|
||||
@Requires({"siteAlleles != null", "! siteAlleles.isEmpty()",
|
||||
"field != null", "decoder != null", "gbs != null", "! gbs.isEmpty()"})
|
||||
public void decode(final List<Allele> siteAlleles,
|
||||
final String field,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs);
|
||||
}
|
||||
|
||||
private class GTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
// we have to do a bit of low-level processing here as we want to know the size upfronta
|
||||
final int ploidy = decoder.decodeNumberOfElements(typeDescriptor);
|
||||
|
||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.size() >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
||||
else {
|
||||
generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* fast path for many samples with diploid genotypes
|
||||
*
|
||||
* The way this would work is simple. Create a List<Allele> diploidGenotypes[] object
|
||||
* After decoding the offset, if that sample is diploid compute the
|
||||
* offset into the alleles vector which is simply offset = allele0 * nAlleles + allele1
|
||||
* if there's a value at diploidGenotypes[offset], use it, otherwise create the genotype
|
||||
* cache it and use that
|
||||
*
|
||||
* Some notes. If there are nAlleles at the site, there are implicitly actually
|
||||
* n + 1 options including
|
||||
*/
|
||||
@Requires("siteAlleles.size() == 2")
|
||||
@SuppressWarnings({"unchecked"})
|
||||
private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
final int nPossibleGenotypes = 3 * 3;
|
||||
final Object allGenotypes[] = new Object[nPossibleGenotypes];
|
||||
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
final int a1 = decoder.decodeInt(type);
|
||||
final int a2 = decoder.decodeInt(type);
|
||||
|
||||
if ( a1 == type.getMissingBytes() ) {
|
||||
assert a2 == type.getMissingBytes();
|
||||
// no called sample GT = .
|
||||
gb.alleles(null);
|
||||
} else if ( a2 == type.getMissingBytes() ) {
|
||||
gb.alleles(Arrays.asList(getAlleleFromEncoded(siteAlleles, a1)));
|
||||
} else {
|
||||
// downshift to remove phase
|
||||
final int offset = (a1 >> 1) * 3 + (a2 >> 1);
|
||||
assert offset < allGenotypes.length;
|
||||
|
||||
// TODO -- how can I get rid of this cast?
|
||||
List<Allele> gt = (List<Allele>)allGenotypes[offset];
|
||||
if ( gt == null ) {
|
||||
final Allele allele1 = getAlleleFromEncoded(siteAlleles, a1);
|
||||
final Allele allele2 = getAlleleFromEncoded(siteAlleles, a2);
|
||||
gt = Arrays.asList(allele1, allele2);
|
||||
allGenotypes[offset] = gt;
|
||||
}
|
||||
|
||||
gb.alleles(gt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final void generalDecode(final List<Allele> siteAlleles,
|
||||
final int ploidy,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
// a single cache for the encoded genotypes, since we don't actually need this vector
|
||||
final int[] tmp = new int[ploidy];
|
||||
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
final int[] encoded = decoder.decodeIntArray(ploidy, type, tmp);
|
||||
if ( encoded == null )
|
||||
// no called sample GT = .
|
||||
gb.alleles(null);
|
||||
else {
|
||||
assert encoded.length > 0;
|
||||
|
||||
// we have at least some alleles to decode
|
||||
final List<Allele> gt = new ArrayList<Allele>(encoded.length);
|
||||
|
||||
// note that the auto-pruning of fields magically handles different
|
||||
// ploidy per sample at a site
|
||||
for ( final int encode : encoded )
|
||||
gt.add(getAlleleFromEncoded(siteAlleles, encode));
|
||||
|
||||
gb.alleles(gt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Requires({"siteAlleles != null && ! siteAlleles.isEmpty()", "encode >= 0"})
|
||||
@Ensures("result != null")
|
||||
private final Allele getAlleleFromEncoded(final List<Allele> siteAlleles, final int encode) {
|
||||
final int offset = encode >> 1;
|
||||
return offset == 0 ? Allele.NO_CALL : siteAlleles.get(offset - 1);
|
||||
}
|
||||
}
|
||||
|
||||
private class DPDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class GQDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class ADDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.AD(decoder.decodeIntArray(typeDescriptor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class PLDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.PL(decoder.decodeIntArray(typeDescriptor));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class GenericDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
if ( value != null ) { // don't add missing values
|
||||
if ( value instanceof List && ((List)value).size() == 1) {
|
||||
// todo -- I really hate this, and it suggests that the code isn't completely right
|
||||
// the reason it's here is that it's possible to prune down a vector to a singleton
|
||||
// value and there we have the contract that the value comes back as an atomic value
|
||||
// not a vector of size 1
|
||||
value = ((List)value).get(0);
|
||||
}
|
||||
gb.attribute(field, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class FTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
if ( value != null ) { // don't add missing values
|
||||
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Lazy version of genotypes decoder for BCF2 genotypes
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 5/12
|
||||
*/
|
||||
class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2LazyGenotypesDecoder.class);
|
||||
|
||||
// the essential information for us to use to decode the genotypes data
|
||||
// initialized when this lazy decoder is created, as we know all of this from the BCF2Codec
|
||||
// and its stored here again for code cleanliness
|
||||
private final BCF2Codec codec;
|
||||
private final ArrayList<Allele> siteAlleles;
|
||||
private final int nSamples;
|
||||
private final int nFields;
|
||||
|
||||
BCF2LazyGenotypesDecoder(final BCF2Codec codec, final ArrayList<Allele> alleles, final int nSamples, final int nFields) {
|
||||
this.codec = codec;
|
||||
this.siteAlleles = alleles;
|
||||
this.nSamples = nSamples;
|
||||
this.nFields = nFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LazyGenotypesContext.LazyData parse(final Object data) {
|
||||
if ( logger.isDebugEnabled() )
|
||||
logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
||||
|
||||
// load our byte[] data into the decoder
|
||||
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
|
||||
|
||||
// TODO -- fast path for sites only
|
||||
|
||||
// go ahead and decode everyone
|
||||
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
|
||||
// create and initialize the genotypes array
|
||||
final ArrayList<GenotypeBuilder> builders = new ArrayList<GenotypeBuilder>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
builders.add(new GenotypeBuilder(samples.get(i)));
|
||||
}
|
||||
|
||||
for ( int i = 0; i < nFields; i++ ) {
|
||||
// get the field name
|
||||
final int offset = (Integer) decoder.decodeTypedValue();
|
||||
final String field = codec.getDictionaryString(offset);
|
||||
|
||||
// the type of each element
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
||||
try {
|
||||
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, builders);
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value");
|
||||
}
|
||||
}
|
||||
|
||||
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
for ( final GenotypeBuilder gb : builders )
|
||||
genotypes.add(gb.make());
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Testing BCF2
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 2012
|
||||
*/
|
||||
public class BCF2TestWalker extends RodWalker<Integer, Integer> {
|
||||
/**
|
||||
* Variants from this VCF file are used by this tool as input.
|
||||
* The file must at least contain the standard VCF header lines, but
|
||||
* can be empty (i.e., no variants are contained in the file).
|
||||
*/
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
public RodBinding<VariantContext> variants;
|
||||
|
||||
@Argument(doc="keep variants", required=false)
|
||||
public boolean keepVariants = false;
|
||||
|
||||
@Argument(doc="quiet", required=false)
|
||||
public boolean quiet = false;
|
||||
|
||||
@Argument(doc="dontIndexOnTheFly", required=false)
|
||||
public boolean dontIndexOnTheFly = false;
|
||||
|
||||
@Output(doc="File to which results should be written",required=true)
|
||||
protected File bcfFile;
|
||||
|
||||
private final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
protected VariantContextWriter writer;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
final Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), Collections.singletonList(variants));
|
||||
final VCFHeader header = VCFUtils.withUpdatedContigs(vcfRods.values().iterator().next(), getToolkit());
|
||||
try {
|
||||
EnumSet<Options> options = EnumSet.of(Options.FORCE_BCF);
|
||||
if ( !dontIndexOnTheFly ) options.add(Options.INDEX_ON_THE_FLY);
|
||||
writer = VariantContextWriterFactory.create(bcfFile, new FileOutputStream(bcfFile), getToolkit().getMasterSequenceDictionary(), options);
|
||||
writer.writeHeader(header);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(bcfFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
for ( VariantContext vc : tracker.getValues(variants, context.getLocation())) {
|
||||
writer.add(vc);
|
||||
if ( keepVariants ) vcs.add(vc);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// default reduce -- doesn't do anything at all
|
||||
//
|
||||
public Integer reduceInit() { return 0; }
|
||||
public Integer reduce(Integer counter, Integer sum) { return counter + sum; }
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
try {
|
||||
writer.close();
|
||||
logger.info("Closed writer");
|
||||
|
||||
// read in the BCF records
|
||||
BCF2Codec codec = new BCF2Codec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(bcfFile));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(bcfFile));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
Iterator<VariantContext> it = vcs.iterator();
|
||||
while ( ! pbs.isDone() ) {
|
||||
if ( keepVariants ) {
|
||||
VariantContext expected = it.next();
|
||||
if ( ! quiet )
|
||||
System.out.printf("vcf = %s %d %s%n", expected.getChr(), expected.getStart(), expected);
|
||||
}
|
||||
VariantContext bcfRaw = codec.decode(pbs);
|
||||
VariantContext bcf = new VariantContextBuilder(bcfRaw).source("variant").make();
|
||||
if ( ! quiet ) {
|
||||
System.out.printf("bcf = %s %d %s%n", bcf.getChr(), bcf.getStart(), bcf.toString());
|
||||
System.out.printf("--------------------------------------------------%n");
|
||||
}
|
||||
}
|
||||
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(bcfFile, "bad user!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -24,18 +24,22 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
/**
|
||||
* BCF2 types and information
|
||||
* BCF2 types and associated information
|
||||
*
|
||||
* @author depristo
|
||||
* @since 05/12
|
||||
*/
|
||||
public enum BCF2Type {
|
||||
INT8(1, 1, BCF2Utils.INT8_MISSING_VALUE, -127, 127), // todo -- confirm range
|
||||
INT16(2, 2, BCF2Utils.INT16_MISSING_VALUE, -32767, 32767),
|
||||
INT32(3, 4, BCF2Utils.INT32_MISSING_VALUE, -2147483647, 2147483647),
|
||||
FLOAT(5, 4, BCF2Utils.FLOAT_MISSING_VALUE),
|
||||
CHAR(7);
|
||||
INT8 (1, 1, 0xFFFFFF80, -127, 127), // todo -- confirm range
|
||||
INT16(2, 2, 0xFFFF8000, -32767, 32767),
|
||||
INT32(3, 4, 0x80000000, -2147483647, 2147483647),
|
||||
FLOAT(5, 4, 0x7F800001),
|
||||
CHAR (7, 1, 0x00000000);
|
||||
|
||||
private final int id;
|
||||
private final Object missingJavaValue;
|
||||
|
|
@ -60,11 +64,53 @@ public enum BCF2Type {
|
|||
this.maxValue = maxValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* How many bytes are used to represent this type on disk?
|
||||
* @return
|
||||
*/
|
||||
public int getSizeInBytes() {
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* The ID according to the BCF2 specification
|
||||
* @return
|
||||
*/
|
||||
public int getID() { return id; }
|
||||
|
||||
/**
|
||||
* Can we encode value v in this type, according to its declared range.
|
||||
*
|
||||
* Only makes sense for integer values
|
||||
*
|
||||
* @param v
|
||||
* @return
|
||||
*/
|
||||
@Requires("INTEGERS.contains(this)")
|
||||
public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; }
|
||||
|
||||
/**
|
||||
* Return the java object (aka null) that is used to represent a missing value for this
|
||||
* type in Java
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Object getMissingJavaValue() { return missingJavaValue; }
|
||||
|
||||
/**
|
||||
* The bytes (encoded as an int) that are used to represent a missing value
|
||||
* for this type in BCF2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getMissingBytes() { return missingBytes; }
|
||||
|
||||
/**
|
||||
* An enum set of the types that might represent Integer values
|
||||
*/
|
||||
public final static EnumSet<BCF2Type> INTEGERS = EnumSet.of(INT8, INT16, INT32);
|
||||
|
||||
public boolean isIntegerType() {
|
||||
return INTEGERS.contains(this);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
|
|
@ -33,9 +35,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.io.OutputStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Common utilities for working with BCF2 files
|
||||
|
|
@ -45,7 +46,7 @@ import java.util.List;
|
|||
* @author depristo
|
||||
* @since 5/12
|
||||
*/
|
||||
public class BCF2Utils {
|
||||
public final class BCF2Utils {
|
||||
public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
|
||||
|
||||
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
|
||||
|
|
@ -53,12 +54,6 @@ public class BCF2Utils {
|
|||
public static final int OVERFLOW_ELEMENT_MARKER = 15;
|
||||
public static final int MAX_INLINE_ELEMENTS = 14;
|
||||
|
||||
// Note that these values are prefixed by FFFFFF for convenience
|
||||
public static final int INT8_MISSING_VALUE = 0xFFFFFF80;
|
||||
public static final int INT16_MISSING_VALUE = 0xFFFF8000;
|
||||
public static final int INT32_MISSING_VALUE = 0x80000000;
|
||||
public static final int FLOAT_MISSING_VALUE = 0x7F800001;
|
||||
|
||||
public final static BCF2Type[] INTEGER_TYPES_BY_SIZE = new BCF2Type[]{BCF2Type.INT8, BCF2Type.INT16, BCF2Type.INT32};
|
||||
public final static BCF2Type[] ID_TO_ENUM;
|
||||
|
||||
|
|
@ -77,11 +72,17 @@ public class BCF2Utils {
|
|||
* The dictionary is an ordered list of common VCF identifers (FILTER, INFO, and FORMAT)
|
||||
* fields.
|
||||
*
|
||||
* Note that its critical that the list be dedupped and sorted in a consistent manner each time,
|
||||
* as the BCF2 offsets are encoded relative to this dictionary, and if it isn't determined exactly
|
||||
* the same way as in the header each time it's very bad
|
||||
*
|
||||
* @param header the VCFHeader from which to build the dictionary
|
||||
* @return a non-null dictionary of elements, may be empty
|
||||
*/
|
||||
@Requires("header != null")
|
||||
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
|
||||
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
final ArrayList<String> dict = new ArrayList<String>();
|
||||
final Set<String> dict = new TreeSet<String>();
|
||||
|
||||
// set up the strings dictionary
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
|
|
@ -92,23 +93,27 @@ public class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
return dict;
|
||||
return new ArrayList<String>(dict);
|
||||
}
|
||||
|
||||
@Requires({"nElements >= 0", "type != null"})
|
||||
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
|
||||
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
||||
return typeByte;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final static int decodeSize(final byte typeDescriptor) {
|
||||
return (0xF0 & typeDescriptor) >> 4;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final static int decodeTypeID(final byte typeDescriptor) {
|
||||
return typeDescriptor & 0x0F;
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public final static BCF2Type decodeType(final byte typeDescriptor) {
|
||||
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
|
||||
}
|
||||
|
|
@ -117,6 +122,7 @@ public class BCF2Utils {
|
|||
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
||||
}
|
||||
|
||||
@Requires("nElements >= 0")
|
||||
public final static boolean willOverflow(final long nElements) {
|
||||
return nElements > MAX_INLINE_ELEMENTS;
|
||||
}
|
||||
|
|
@ -128,6 +134,7 @@ public class BCF2Utils {
|
|||
}
|
||||
|
||||
public final static byte readByte(final InputStream stream) {
|
||||
// TODO -- shouldn't be capturing error here
|
||||
try {
|
||||
return (byte)(stream.read() & 0xFF);
|
||||
} catch ( IOException e ) {
|
||||
|
|
@ -135,6 +142,7 @@ public class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
|
|
@ -161,10 +169,10 @@ public class BCF2Utils {
|
|||
* @param strings size > 1 list of strings
|
||||
* @return
|
||||
*/
|
||||
@Requires({"strings != null", "strings.size() > 1"})
|
||||
@Ensures("result != null")
|
||||
public static final String collapseStringList(final List<String> strings) {
|
||||
assert strings.size() > 1;
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
final StringBuilder b = new StringBuilder();
|
||||
for ( final String s : strings ) {
|
||||
assert s.indexOf(",") == -1; // no commas in individual strings
|
||||
b.append(",").append(s);
|
||||
|
|
@ -181,12 +189,15 @@ public class BCF2Utils {
|
|||
* @param collapsed
|
||||
* @return
|
||||
*/
|
||||
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
|
||||
@Ensures("result != null")
|
||||
public static final List<String> exploreStringList(final String collapsed) {
|
||||
assert isCollapsedString(collapsed);
|
||||
final String[] exploded = collapsed.substring(1).split(",");
|
||||
return Arrays.asList(exploded);
|
||||
}
|
||||
|
||||
@Requires("s != null")
|
||||
public static final boolean isCollapsedString(final String s) {
|
||||
return s.charAt(0) == ',';
|
||||
}
|
||||
|
|
@ -200,6 +211,8 @@ public class BCF2Utils {
|
|||
* @param vcfFile
|
||||
* @return
|
||||
*/
|
||||
@Requires("vcfFile != null")
|
||||
@Ensures("result != null")
|
||||
public static final File shadowBCF(final File vcfFile) {
|
||||
final String path = vcfFile.getAbsolutePath();
|
||||
if ( path.contains(".vcf") )
|
||||
|
|
@ -207,4 +220,109 @@ public class BCF2Utils {
|
|||
else
|
||||
return new File( path + ".bcf" );
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
return potentialType;
|
||||
}
|
||||
|
||||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum BCF2 integer size of t1 and t2
|
||||
*
|
||||
* For example, if t1 == INT8 and t2 == INT16 returns INT16
|
||||
*
|
||||
* @param t1
|
||||
* @param t2
|
||||
* @return
|
||||
*/
|
||||
@Requires({"BCF2Type.INTEGERS.contains(t1)","BCF2Type.INTEGERS.contains(t2)"})
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||
switch ( t1 ) {
|
||||
case INT8: return t2;
|
||||
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
|
||||
case INT32: return t1;
|
||||
default: throw new ReviewedStingException("BUG: unexpected BCF2Type " + t1);
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that takes an object and returns a list representation
|
||||
* of it:
|
||||
*
|
||||
* o == null => []
|
||||
* o is a list => o
|
||||
* else => [o]
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public final static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
}
|
||||
|
||||
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
switch ( type.getSizeInBytes() ) {
|
||||
case 1:
|
||||
encodeStream.write(0xFF & value);
|
||||
break;
|
||||
case 2:
|
||||
encodeStream.write((0xFF00 & value) >> 8);
|
||||
encodeStream.write(0xFF & value);
|
||||
break;
|
||||
case 4:
|
||||
encodeStream.write((0xFF000000 & value) >> 24);
|
||||
encodeStream.write((0x00FF0000 & value) >> 16);
|
||||
encodeStream.write((0x0000FF00 & value) >> 8);
|
||||
encodeStream.write((0x000000FF & value));
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: unexpected type size " + type);
|
||||
}
|
||||
// general case for reference
|
||||
// for ( int i = type.getSizeInBytes() - 1; i >= 0; i-- ) {
|
||||
// final int shift = i * 8;
|
||||
// int mask = 0xFF << shift;
|
||||
// int byteValue = (mask & value) >> shift;
|
||||
// encodeStream.write(byteValue);
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
|
||||
// we have to store the list of strings that make up the header until they're needed
|
||||
protected VCFHeader header = null;
|
||||
protected VCFHeaderVersion version = null;
|
||||
|
||||
// a mapping of the allele
|
||||
protected Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3);
|
||||
|
|
@ -48,7 +49,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
protected final String[] locParts = new String[6];
|
||||
|
||||
// for performance we cache the hashmap of filter encodings for quick lookup
|
||||
protected HashMap<String,LinkedHashSet<String>> filterHash = new HashMap<String,LinkedHashSet<String>>();
|
||||
protected HashMap<String,List<String>> filterHash = new HashMap<String,List<String>>();
|
||||
|
||||
// we store a name to give to each of the variant contexts we emit
|
||||
protected String name = "Unknown";
|
||||
|
|
@ -91,24 +92,12 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
*/
|
||||
public abstract Object readHeader(LineReader reader);
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
*
|
||||
* @param str the string
|
||||
* @param alleles the list of alleles
|
||||
* @param chr chrom
|
||||
* @param pos position
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
public abstract LazyGenotypesContext.LazyData createGenotypeMap(String str, List<Allele> alleles, String chr, int pos);
|
||||
|
||||
|
||||
/**
|
||||
* parse the filter string, first checking to see if we already have parsed it in a previous attempt
|
||||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied
|
||||
*/
|
||||
protected abstract Set<String> parseFilters(String filterString);
|
||||
protected abstract List<String> parseFilters(String filterString);
|
||||
|
||||
/**
|
||||
* create a VCF header from a set of header record lines
|
||||
|
|
@ -117,6 +106,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
* @return a VCFHeader object
|
||||
*/
|
||||
protected VCFHeader parseHeaderFromLines( final List<String> headerStrings, final VCFHeaderVersion version ) {
|
||||
this.version = version;
|
||||
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
Set<String> sampleNames = new LinkedHashSet<String>();
|
||||
int contigCounter = 0;
|
||||
|
|
@ -320,7 +311,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
String ref = getCachedString(parts[3].toUpperCase());
|
||||
String alts = getCachedString(parts[4].toUpperCase());
|
||||
builder.log10PError(parseQual(parts[5]));
|
||||
builder.filters(parseFilters(getCachedString(parts[6])));
|
||||
|
||||
final List<String> filters = parseFilters(getCachedString(parts[6]));
|
||||
if ( filters != null ) builder.filters(new HashSet<String>(filters));
|
||||
final Map<String, Object> attrs = parseInfo(parts[7]);
|
||||
builder.attributes(attrs);
|
||||
|
||||
|
|
@ -719,4 +712,115 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
try { stream.close(); } catch ( IOException e ) {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
*
|
||||
* @param str the string
|
||||
* @param alleles the list of alleles
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
public LazyGenotypesContext.LazyData createGenotypeMap(final String str,
|
||||
final List<Allele> alleles,
|
||||
final String chr,
|
||||
final int pos) {
|
||||
if (genotypeParts == null)
|
||||
genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS];
|
||||
|
||||
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||
if ( nParts != genotypeParts.length )
|
||||
generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
|
||||
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
|
||||
|
||||
// get the format keys
|
||||
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
// cycle through the sample names
|
||||
Iterator<String> sampleNameIterator = header.getGenotypeSamples().iterator();
|
||||
|
||||
// clear out our allele mapping
|
||||
alleleMap.clear();
|
||||
|
||||
// cycle through the genotype strings
|
||||
for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
|
||||
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
final String sampleName = sampleNameIterator.next();
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sampleName);
|
||||
|
||||
// check to see if the value list is longer than the key list, which is a problem
|
||||
if (nGTKeys < GTValueSplitSize)
|
||||
generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]);
|
||||
|
||||
int genotypeAlleleLocation = -1;
|
||||
if (nGTKeys >= 1) {
|
||||
gb.maxAttributes(nGTKeys - 1);
|
||||
|
||||
for (int i = 0; i < nGTKeys; i++) {
|
||||
final String gtKey = genotypeKeyArray[i];
|
||||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
genotypeAlleleLocation = i;
|
||||
} else if ( missing ) {
|
||||
// if its truly missing (there no provided value) skip adding it to the attributes
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
|
||||
final List<String> filters = parseFilters(getCachedString(GTValueArray[i]));
|
||||
if ( filters != null ) gb.filters(filters);
|
||||
} else if ( GTValueArray[i].equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// don't add missing values to the map
|
||||
} else {
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
if ( GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) )
|
||||
gb.noGQ();
|
||||
else
|
||||
gb.GQ((int)Math.round(Double.valueOf(GTValueArray[i])));
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
|
||||
gb.AD(decodeInts(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
gb.PL(decodeInts(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
gb.PL(GenotypeLikelihoods.fromGLField(GTValueArray[i]).getAsPLs());
|
||||
} else if (gtKey.equals(VCFConstants.DEPTH_KEY)) {
|
||||
gb.DP(Integer.valueOf(GTValueArray[i]));
|
||||
} else {
|
||||
gb.attribute(gtKey, GTValueArray[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a genotype field if our version is less than 4.1 file
|
||||
if ( version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
|
||||
|
||||
final List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||
gb.alleles(GTalleles);
|
||||
gb.phased(genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1);
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.add(gb.make());
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
}
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, header.getSampleNamesInOrder(), header.getSampleNameToOffset());
|
||||
}
|
||||
|
||||
|
||||
private final static String[] INT_DECODE_ARRAY = new String[10000];
|
||||
private final static int[] decodeInts(final String string) {
|
||||
final int nValues = ParsingUtils.split(string, INT_DECODE_ARRAY, ',');
|
||||
final int[] values = new int[nValues];
|
||||
for ( int i = 0; i < nValues; i++ )
|
||||
values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]);
|
||||
return values;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broad.tribble.TribbleException;
|
||||
|
|
@ -78,24 +102,24 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied
|
||||
*/
|
||||
protected Set<String> parseFilters(String filterString) {
|
||||
protected List<String> parseFilters(String filterString) {
|
||||
|
||||
// null for unfiltered
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null;
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
List<String> fFields = new ArrayList<String>();
|
||||
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||
return fFields;
|
||||
return new ArrayList<String>(fFields);
|
||||
|
||||
if ( filterString.length() == 0 )
|
||||
generateException("The VCF specification requires a valid filter status");
|
||||
|
||||
// do we have the filter string cached?
|
||||
if ( filterHash.containsKey(filterString) )
|
||||
return filterHash.get(filterString);
|
||||
return new ArrayList<String>(filterHash.get(filterString));
|
||||
|
||||
// otherwise we have to parse and cache the value
|
||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||
|
|
@ -108,93 +132,6 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
return fFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
*
|
||||
* @param str the string
|
||||
* @param alleles the list of alleles
|
||||
* @param chr chrom
|
||||
* @param pos position
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
public LazyGenotypesContext.LazyData createGenotypeMap(String str, List<Allele> alleles, String chr, int pos) {
|
||||
if (genotypeParts == null)
|
||||
genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS];
|
||||
|
||||
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||
if ( nParts != genotypeParts.length )
|
||||
generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
|
||||
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
|
||||
|
||||
// get the format keys
|
||||
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
// cycle through the sample names
|
||||
Iterator<String> sampleNameIterator = header.getGenotypeSamples().iterator();
|
||||
|
||||
// clear out our allele mapping
|
||||
alleleMap.clear();
|
||||
|
||||
// cycle through the genotype strings
|
||||
for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
|
||||
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
double GTQual = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> genotypeFilters = null;
|
||||
Map<String, Object> gtAttributes = null;
|
||||
String sampleName = sampleNameIterator.next();
|
||||
|
||||
// check to see if the value list is longer than the key list, which is a problem
|
||||
if (nGTKeys < GTValueSplitSize)
|
||||
generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]);
|
||||
|
||||
int genotypeAlleleLocation = -1;
|
||||
if (nGTKeys >= 1) {
|
||||
gtAttributes = new HashMap<String, Object>(nGTKeys - 1);
|
||||
|
||||
for (int i = 0; i < nGTKeys; i++) {
|
||||
final String gtKey = new String(genotypeKeyArray[i]);
|
||||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
|
||||
genotypeFilters = missing ? parseFilters(VCFConstants.MISSING_VALUE_v4) : parseFilters(getCachedString(GTValueArray[i]));
|
||||
} else if ( missing || GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) ) {
|
||||
gtAttributes.put(gtKey, VCFConstants.MISSING_VALUE_v4);
|
||||
} else {
|
||||
gtAttributes.put(gtKey, new String(GTValueArray[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a genotype field
|
||||
if ( genotypeAlleleLocation < 0 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes");
|
||||
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.add(new Genotype(sampleName,
|
||||
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
}
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, header.sampleNamesInOrder, header.sampleNameToOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canDecode(final String potentialInput) {
|
||||
return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER);
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ import java.util.*;
|
|||
public class VCFCodec extends AbstractVCFCodec {
|
||||
// Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
|
||||
public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
|
||||
private VCFHeaderVersion version = null;
|
||||
|
||||
/**
|
||||
* A VCF header the contains master info/filter/format records that we use to 'fill in'
|
||||
|
|
@ -127,121 +126,33 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
|
||||
*/
|
||||
protected Set<String> parseFilters(String filterString) {
|
||||
return parseFilters(filterHash, lineNo, filterString);
|
||||
}
|
||||
|
||||
public static Set<String> parseFilters(final Map<String, LinkedHashSet<String>> cache, final int lineNo, final String filterString) {
|
||||
protected List<String> parseFilters(String filterString) {
|
||||
// null for unfiltered
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null;
|
||||
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
|
||||
return Collections.emptySet();
|
||||
return Collections.emptyList();
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
|
||||
if ( filterString.length() == 0 )
|
||||
generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
|
||||
|
||||
// do we have the filter string cached?
|
||||
if ( cache != null && cache.containsKey(filterString) )
|
||||
return Collections.unmodifiableSet(cache.get(filterString));
|
||||
if ( filterHash.containsKey(filterString) )
|
||||
return filterHash.get(filterString);
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
List<String> fFields = new LinkedList<String>();
|
||||
// otherwise we have to parse and cache the value
|
||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||
fFields.add(filterString);
|
||||
else
|
||||
fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
|
||||
|
||||
fFields = fFields;
|
||||
if ( cache != null ) cache.put(filterString, fFields);
|
||||
filterHash.put(filterString, Collections.unmodifiableList(fFields));
|
||||
|
||||
return Collections.unmodifiableSet(fFields);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
*
|
||||
* @param str the string
|
||||
* @param alleles the list of alleles
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
public LazyGenotypesContext.LazyData createGenotypeMap(String str, List<Allele> alleles, String chr, int pos) {
|
||||
if (genotypeParts == null)
|
||||
genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS];
|
||||
|
||||
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||
if ( nParts != genotypeParts.length )
|
||||
generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
|
||||
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
|
||||
|
||||
// get the format keys
|
||||
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
// cycle through the sample names
|
||||
Iterator<String> sampleNameIterator = header.getGenotypeSamples().iterator();
|
||||
|
||||
// clear out our allele mapping
|
||||
alleleMap.clear();
|
||||
|
||||
// cycle through the genotype strings
|
||||
for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) {
|
||||
int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||
|
||||
double GTQual = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> genotypeFilters = null;
|
||||
Map<String, Object> gtAttributes = null;
|
||||
String sampleName = sampleNameIterator.next();
|
||||
|
||||
// check to see if the value list is longer than the key list, which is a problem
|
||||
if (nGTKeys < GTValueSplitSize)
|
||||
generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]);
|
||||
|
||||
int genotypeAlleleLocation = -1;
|
||||
if (nGTKeys >= 1) {
|
||||
gtAttributes = new HashMap<String, Object>(nGTKeys - 1);
|
||||
|
||||
for (int i = 0; i < nGTKeys; i++) {
|
||||
final String gtKey = new String(genotypeKeyArray[i]);
|
||||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
|
||||
genotypeFilters = missing ? parseFilters(VCFConstants.MISSING_VALUE_v4) : parseFilters(getCachedString(GTValueArray[i]));
|
||||
} else if ( missing ) {
|
||||
// if its truly missing (there no provided value) skip adding it to the attributes
|
||||
} else {
|
||||
gtAttributes.put(gtKey, GTValueArray[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a genotype field if we are a VCF4.0 file
|
||||
if ( version == VCFHeaderVersion.VCF4_0 && genotypeAlleleLocation == -1 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
|
||||
|
||||
List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||
boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.add(new Genotype(sampleName, GTalleles, GTQual, genotypeFilters, gtAttributes, phased));
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
}
|
||||
|
||||
return new LazyGenotypesContext.LazyData(genotypes, header.sampleNamesInOrder, header.sampleNameToOffset);
|
||||
return fFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -56,8 +56,9 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
public String getDescription() { return description; }
|
||||
public VCFHeaderLineType getType() { return type; }
|
||||
public VCFHeaderLineCount getCountType() { return countType; }
|
||||
public boolean isFixedCount() { return countType == VCFHeaderLineCount.INTEGER; }
|
||||
public int getCount() {
|
||||
if ( countType != VCFHeaderLineCount.INTEGER )
|
||||
if ( ! isFixedCount() )
|
||||
throw new ReviewedStingException("Asking for header line count when type is not an integer");
|
||||
return count;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ public final class VCFConstants {
|
|||
public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; // log10 scaled genotype likelihoods
|
||||
public static final String GENOTYPE_POSTERIORS_KEY = "GP";
|
||||
public static final String GENOTYPE_QUALITY_KEY = "GQ";
|
||||
public static final String GENOTYPE_ALLELE_DEPTHS = "AD";
|
||||
public static final String HAPMAP2_KEY = "H2";
|
||||
public static final String HAPMAP3_KEY = "H3";
|
||||
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
|
||||
|
|
@ -113,7 +114,5 @@ public final class VCFConstants {
|
|||
public static final String EMPTY_GENOTYPE = "./.";
|
||||
public static final int MAX_GENOTYPE_QUAL = 99;
|
||||
|
||||
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
|
||||
public static final String DOUBLE_PRECISION_INT_SUFFIX = ".00";
|
||||
public static final Double VCF_ENCODING_EPSILON = 0.00005; // when we consider fields equal(), used in the Qual compare
|
||||
}
|
||||
|
|
@ -24,12 +24,18 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* This class is really a POS. It allows duplicate entries in the metadata,
|
||||
* stores header lines in lots of places, and all around f*cking sucks.
|
||||
*
|
||||
* todo -- clean this POS up
|
||||
*
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFHeader
|
||||
|
|
@ -37,6 +43,7 @@ import java.util.*;
|
|||
* A class representing the VCF header
|
||||
*/
|
||||
public class VCFHeader {
|
||||
final protected static Logger logger = Logger.getLogger(VCFHeader.class);
|
||||
|
||||
// the mandatory header fields
|
||||
public enum HEADER_FIELDS {
|
||||
|
|
@ -68,8 +75,8 @@ public class VCFHeader {
|
|||
private boolean samplesWereAlreadySorted = true;
|
||||
|
||||
// cache for efficient conversion of VCF -> VariantContext
|
||||
protected ArrayList<String> sampleNamesInOrder = null;
|
||||
protected HashMap<String, Integer> sampleNameToOffset = null;
|
||||
private ArrayList<String> sampleNamesInOrder = null;
|
||||
private HashMap<String, Integer> sampleNameToOffset = null;
|
||||
|
||||
private boolean writeEngineHeaders = true;
|
||||
private boolean writeCommandLine = true;
|
||||
|
|
@ -164,10 +171,10 @@ public class VCFHeader {
|
|||
for ( VCFHeaderLine line : mMetaData ) {
|
||||
if ( line instanceof VCFInfoHeaderLine ) {
|
||||
VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line;
|
||||
mInfoMetaData.put(infoLine.getID(), infoLine);
|
||||
addMetaDataMapBinding(mInfoMetaData, infoLine);
|
||||
} else if ( line instanceof VCFFormatHeaderLine ) {
|
||||
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
|
||||
mFormatMetaData.put(formatLine.getID(), formatLine);
|
||||
addMetaDataMapBinding(mFormatMetaData, formatLine);
|
||||
} else if ( line instanceof VCFContigHeaderLine ) {
|
||||
contigMetaData.add((VCFContigHeaderLine)line);
|
||||
} else {
|
||||
|
|
@ -176,6 +183,21 @@ public class VCFHeader {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add line to map, issuing warnings about duplicates
|
||||
*
|
||||
* @param map
|
||||
* @param line
|
||||
* @param <T>
|
||||
*/
|
||||
private final <T extends VCFCompoundHeaderLine> void addMetaDataMapBinding(final Map<String, T> map, T line) {
|
||||
final String key = line.getID();
|
||||
if ( map.containsKey(key) )
|
||||
logger.warn("Found duplicate VCF header lines for " + key + "; keeping the first only" );
|
||||
else
|
||||
map.put(key, line);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the header fields in order they're presented in the input file (which is now required to be
|
||||
* the order presented in the spec).
|
||||
|
|
@ -193,7 +215,7 @@ public class VCFHeader {
|
|||
*/
|
||||
public Set<VCFHeaderLine> getMetaData() {
|
||||
Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
|
||||
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
|
||||
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_1.getFormatString(), VCFHeaderVersion.VCF4_1.getVersionString()));
|
||||
lines.addAll(mMetaData);
|
||||
return Collections.unmodifiableSet(lines);
|
||||
}
|
||||
|
|
@ -221,13 +243,17 @@ public class VCFHeader {
|
|||
return mGenotypeSampleNames;
|
||||
}
|
||||
|
||||
public int getNGenotypeSamples() {
|
||||
return mGenotypeSampleNames.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have genotyping data?
|
||||
*
|
||||
* @return true if we have genotyping columns, false otherwise
|
||||
*/
|
||||
public boolean hasGenotypingData() {
|
||||
return mGenotypeSampleNames.size() > 0;
|
||||
return getNGenotypeSamples() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -244,6 +270,14 @@ public class VCFHeader {
|
|||
return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0);
|
||||
}
|
||||
|
||||
public Collection<VCFInfoHeaderLine> getInfoHeaderLines() {
|
||||
return mInfoMetaData.values();
|
||||
}
|
||||
|
||||
public Collection<VCFFormatHeaderLine> getFormatHeaderLines() {
|
||||
return mFormatMetaData.values();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id the header key name
|
||||
* @return the meta data line, or null if there is none
|
||||
|
|
@ -299,4 +333,12 @@ public class VCFHeader {
|
|||
public void setWriteCommandLine(boolean writeCommandLine) {
|
||||
this.writeCommandLine = writeCommandLine;
|
||||
}
|
||||
|
||||
public ArrayList<String> getSampleNamesInOrder() {
|
||||
return sampleNamesInOrder;
|
||||
}
|
||||
|
||||
public HashMap<String, Integer> getSampleNameToOffset() {
|
||||
return sampleNameToOffset;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -336,9 +336,15 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
* Clears all attributes except ReadGroup of the read.
|
||||
*/
|
||||
public GATKSAMRecord simplify () {
|
||||
GATKSAMReadGroupRecord rg = getReadGroup();
|
||||
this.clearAttributes();
|
||||
setReadGroup(rg);
|
||||
GATKSAMReadGroupRecord rg = getReadGroup(); // save the read group information
|
||||
byte[] insQuals = (this.getAttribute(BQSR_BASE_INSERTION_QUALITIES) == null) ? null : getBaseInsertionQualities();
|
||||
byte[] delQuals = (this.getAttribute(BQSR_BASE_DELETION_QUALITIES) == null) ? null : getBaseDeletionQualities();
|
||||
this.clearAttributes(); // clear all attributes from the read
|
||||
this.setReadGroup(rg); // restore read group
|
||||
if (insQuals != null)
|
||||
this.setBaseQualities(insQuals, EventType.BASE_INSERTION); // restore base insertion if we had any
|
||||
if (delQuals != null)
|
||||
this.setBaseQualities(delQuals, EventType.BASE_DELETION); // restore base deletion if we had any
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -336,7 +336,7 @@ public class Allele implements Comparable<Allele> {
|
|||
*
|
||||
* @return the segregating bases
|
||||
*/
|
||||
public String getBaseString() { return new String(getBases()); }
|
||||
public String getBaseString() { return isNoCall() ? NO_CALL_STRING : new String(getBases()); }
|
||||
|
||||
/**
|
||||
* Return the printed representation of this allele.
|
||||
|
|
|
|||
|
|
@ -226,12 +226,12 @@ final class CommonInfo {
|
|||
return Boolean.valueOf((String)x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
// public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); } // **NOTE**: will turn a null Object into the String "null"
|
||||
// public int getAttributeAsInt(String key) { Object x = getAttribute(key); return x instanceof Integer ? (Integer)x : Integer.valueOf((String)x); }
|
||||
// public double getAttributeAsDouble(String key) { Object x = getAttribute(key); return x instanceof Double ? (Double)x : Double.valueOf((String)x); }
|
||||
// public boolean getAttributeAsBoolean(String key) { Object x = getAttribute(key); return x instanceof Boolean ? (Boolean)x : Boolean.valueOf((String)x); }
|
||||
// public String getAttributeAsString(String key) { return (String.valueOf(getExtendedAttribute(key))); } // **NOTE**: will turn a null Object into the String "null"
|
||||
// public int getAttributeAsInt(String key) { Object x = getExtendedAttribute(key); return x instanceof Integer ? (Integer)x : Integer.valueOf((String)x); }
|
||||
// public double getAttributeAsDouble(String key) { Object x = getExtendedAttribute(key); return x instanceof Double ? (Double)x : Double.valueOf((String)x); }
|
||||
// public boolean getAttributeAsBoolean(String key) { Object x = getExtendedAttribute(key); return x instanceof Boolean ? (Boolean)x : Boolean.valueOf((String)x); }
|
||||
// public Integer getAttributeAsIntegerNoException(String key) { try {return getAttributeAsInt(key);} catch (Exception e) {return null;} }
|
||||
// public Double getAttributeAsDoubleNoException(String key) { try {return getAttributeAsDouble(key);} catch (Exception e) {return null;} }
|
||||
// public String getAttributeAsStringNoException(String key) { if (getAttribute(key) == null) return null; return getAttributeAsString(key); }
|
||||
// public String getAttributeAsStringNoException(String key) { if (getExtendedAttribute(key) == null) return null; return getAttributeAsString(key); }
|
||||
// public Boolean getAttributeAsBooleanNoException(String key) { try {return getAttributeAsBoolean(key);} catch (Exception e) {return null;} }
|
||||
}
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class encompasses all the basic information about a genotype. It is immutable.
|
||||
*
|
||||
* A genotype has several key fields
|
||||
*
|
||||
* -- a sample name, must be a non-null string
|
||||
*
|
||||
* -- an ordered list of alleles, intrepreted as the genotype of the sample,
|
||||
* each allele for each chromosome given in order. If alleles = [a*, t]
|
||||
* then the sample is a/t, with a (the reference from the *) the first
|
||||
* chromosome and t on the second chromosome
|
||||
*
|
||||
* -- a isPhased marker indicting where the alleles are phased with respect to some global
|
||||
* coordinate system. See VCF4.1 spec for a detailed discussion
|
||||
*
|
||||
* -- Inline, optimized ints and int[] values for:
|
||||
* -- GQ: the phred-scaled genotype quality, of -1 if it's missing
|
||||
*
|
||||
* -- DP: the count of reads at this locus for this sample, of -1 if missing
|
||||
*
|
||||
* -- AD: an array of counts of reads at this locus, one for each Allele at the site.
|
||||
* that is, for each allele in the surrounding VariantContext. Null if missing.
|
||||
*
|
||||
* -- PL: phred-scaled genotype likelihoods in standard VCF4.1 order for
|
||||
* all combinations of the alleles in the surrounding VariantContext, given
|
||||
* the ploidy of the sample (from the alleles vector). Null if missing.
|
||||
*
|
||||
* -- A general map from String keys to -> Object values for all other attributes in
|
||||
* this genotype. Note that this map should not contain duplicate values for the
|
||||
* standard bindings for GQ, DP, AD, and PL. Genotype filters can be put into
|
||||
* this genotype, but it isn't respected by the GATK in analyses
|
||||
*
|
||||
* The only way to build a Genotype object is with a GenotypeBuilder, which permits values
|
||||
* to be set in any order, which means that GenotypeBuilder may at some in the chain of
|
||||
* sets pass through invalid states that are not permitted in a fully formed immutable
|
||||
* Genotype.
|
||||
*
|
||||
* Note this is a simplified, refactored Genotype object based on the original
|
||||
* generic (and slow) implementation from the original VariantContext + Genotype
|
||||
* codebase.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 05/12
|
||||
*/
|
||||
public final class FastGenotype extends Genotype {
|
||||
private final List<Allele> alleles;
|
||||
private final boolean isPhased;
|
||||
private final int GQ;
|
||||
private final int DP;
|
||||
private final int[] AD;
|
||||
private final int[] PL;
|
||||
private final Map<String, Object> extendedAttributes;
|
||||
|
||||
/**
|
||||
* The only way to make one of these, for use by GenotypeBuilder only
|
||||
*
|
||||
* @param sampleName
|
||||
* @param alleles
|
||||
* @param isPhased
|
||||
* @param GQ
|
||||
* @param DP
|
||||
* @param AD
|
||||
* @param PL
|
||||
* @param extendedAttributes
|
||||
*/
|
||||
@Requires({
|
||||
"sampleName != null",
|
||||
"alleles != null",
|
||||
"GQ >= -1",
|
||||
"DP >= -1",
|
||||
"validADorPLField(AD)",
|
||||
"validADorPLField(PL)",
|
||||
"extendedAttributes != null",
|
||||
"! hasForbiddenKey(extendedAttributes)"})
|
||||
protected FastGenotype(final String sampleName,
|
||||
final List<Allele> alleles,
|
||||
final boolean isPhased,
|
||||
final int GQ,
|
||||
final int DP,
|
||||
final int[] AD,
|
||||
final int[] PL,
|
||||
final Map<String, Object> extendedAttributes) {
|
||||
super(sampleName);
|
||||
this.alleles = alleles;
|
||||
this.isPhased = isPhased;
|
||||
this.GQ = GQ;
|
||||
this.DP = DP;
|
||||
this.AD = AD;
|
||||
this.PL = PL;
|
||||
this.extendedAttributes = extendedAttributes;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Implmenting the abstract methods
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Override public List<Allele> getAlleles() {
|
||||
return alleles;
|
||||
}
|
||||
|
||||
@Override public Allele getAllele(int i) {
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
@Override public boolean isPhased() {
|
||||
return isPhased;
|
||||
}
|
||||
|
||||
@Override public int getDP() {
|
||||
return DP;
|
||||
}
|
||||
|
||||
@Override public int[] getAD() {
|
||||
return AD;
|
||||
}
|
||||
|
||||
@Override public int getGQ() {
|
||||
return GQ;
|
||||
}
|
||||
|
||||
@Override public List<String> getFilters() {
|
||||
return (List<String>) getExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filtersWereApplied() {
|
||||
return hasExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
}
|
||||
|
||||
@Override public int[] getPL() {
|
||||
return PL;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// get routines for extended attributes
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Map<String, Object> getExtendedAttributes() {
|
||||
return extendedAttributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is values a valid AD or PL field
|
||||
* @param values
|
||||
* @return
|
||||
*/
|
||||
private final static boolean validADorPLField(final int[] values) {
|
||||
if ( values != null )
|
||||
for ( int v : values )
|
||||
if ( v < 0 )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,9 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -12,132 +15,187 @@ import java.util.*;
|
|||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
public class Genotype implements Comparable<Genotype> {
|
||||
@Invariant({
|
||||
"getAlleles() != null",
|
||||
"getSampleName() != null",
|
||||
"getPloidy() >= 0",
|
||||
"! hasForbiddenKey(getExtendedAttributes())"})
|
||||
public abstract class Genotype implements Comparable<Genotype> {
|
||||
/**
|
||||
* A list of genotype field keys corresponding to values we
|
||||
* manage inline in the Genotype object. They must not appear in the
|
||||
* extended attributes map
|
||||
*/
|
||||
public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
|
||||
VCFConstants.GENOTYPE_KEY,
|
||||
VCFConstants.GENOTYPE_QUALITY_KEY,
|
||||
VCFConstants.DEPTH_KEY,
|
||||
VCFConstants.GENOTYPE_ALLELE_DEPTHS,
|
||||
VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
|
||||
public final static String PHASED_ALLELE_SEPARATOR = "|";
|
||||
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
|
||||
|
||||
protected CommonInfo commonInfo;
|
||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||
protected List<Allele> alleles = null; // new ArrayList<Allele>();
|
||||
protected Type type = null;
|
||||
private final String sampleName;
|
||||
private GenotypeType type = null;
|
||||
|
||||
protected boolean isPhased = false;
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased) {
|
||||
this(sampleName, alleles, log10PError, filters, attributes, isPhased, null);
|
||||
protected Genotype(final String sampleName) {
|
||||
this.sampleName = sampleName;
|
||||
}
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||
if ( alleles == null || alleles.isEmpty() )
|
||||
this.alleles = Collections.emptyList();
|
||||
else
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
|
||||
if ( log10Likelihoods != null )
|
||||
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||
this.isPhased = isPhased;
|
||||
validate();
|
||||
protected Genotype(final String sampleName, final GenotypeType type) {
|
||||
this.sampleName = sampleName;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Genotype for sampleName with genotype according to alleles.
|
||||
* @param sampleName
|
||||
* @param alleles
|
||||
* @param log10PError the confidence in these alleles
|
||||
* @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
|
||||
* @return the alleles for this genotype. Cannot be null. May be empty
|
||||
*/
|
||||
public Genotype(String sampleName, List<Allele> alleles, double log10PError, double[] log10Likelihoods) {
|
||||
this(sampleName, alleles, log10PError, null, null, false, log10Likelihoods);
|
||||
}
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double log10PError) {
|
||||
this(sampleName, alleles, log10PError, null, null, false);
|
||||
}
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles) {
|
||||
this(sampleName, alleles, NO_LOG10_PERROR, null, null, false);
|
||||
}
|
||||
|
||||
public Genotype(String sampleName, Genotype parent) {
|
||||
this(sampleName, parent.getAlleles(), parent.getLog10PError(), parent.getFilters(), parent.getAttributes(), parent.isPhased());
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Partial-cloning routines (because Genotype is immutable).
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public static Genotype modifyName(Genotype g, String name) {
|
||||
return new Genotype(name, g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
||||
}
|
||||
|
||||
public static Genotype modifyAttributes(Genotype g, Map<String, Object> attributes) {
|
||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
|
||||
}
|
||||
|
||||
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
|
||||
return new Genotype(g.getSampleName(), alleles, g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
||||
}
|
||||
@Ensures("result != null")
|
||||
public abstract List<Allele> getAlleles();
|
||||
|
||||
/**
|
||||
* @return the alleles for this genotype
|
||||
* Returns how many times allele appears in this genotype object?
|
||||
*
|
||||
* @param allele
|
||||
* @return a value >= 0 indicating how many times the allele occurred in this sample's genotype
|
||||
*/
|
||||
public List<Allele> getAlleles() {
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles(Allele allele) {
|
||||
List<Allele> al = new ArrayList<Allele>();
|
||||
for ( Allele a : alleles )
|
||||
@Requires("allele != null")
|
||||
@Ensures("result >= 0")
|
||||
public int countAllele(final Allele allele) {
|
||||
int c = 0;
|
||||
for ( final Allele a : getAlleles() )
|
||||
if ( a.equals(allele) )
|
||||
al.add(a);
|
||||
c++;
|
||||
|
||||
return Collections.unmodifiableList(al);
|
||||
return c;
|
||||
}
|
||||
|
||||
public Allele getAllele(int i) {
|
||||
if ( getType() == Type.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
public boolean isPhased() { return isPhased; }
|
||||
|
||||
/**
|
||||
* Get the ith allele in this genotype
|
||||
*
|
||||
* @param i the ith allele, must be < the ploidy, starting with 0
|
||||
* @return the allele at position i, which cannot be null
|
||||
*/
|
||||
@Requires({"i >=0 && i < getPloidy()", "getType() != GenotypeType.UNAVAILABLE"})
|
||||
@Ensures("result != null")
|
||||
public abstract Allele getAllele(int i);
|
||||
|
||||
/**
|
||||
* Are the alleles phased w.r.t. the global phasing system?
|
||||
*
|
||||
* @return true if yes
|
||||
*/
|
||||
public abstract boolean isPhased();
|
||||
|
||||
/**
|
||||
* What is the ploidy of this sample?
|
||||
*
|
||||
* @return the ploidy of this genotype. 0 if the site is no-called.
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getPloidy() {
|
||||
return alleles.size();
|
||||
return getAlleles().size();
|
||||
}
|
||||
|
||||
public enum Type {
|
||||
NO_CALL,
|
||||
HOM_REF,
|
||||
HET,
|
||||
HOM_VAR,
|
||||
UNAVAILABLE,
|
||||
MIXED // no-call and call in the same genotype
|
||||
/**
|
||||
* @return the sequencing depth of this sample, or -1 if this value is missing
|
||||
*/
|
||||
@Ensures("result >= -1")
|
||||
public abstract int getDP();
|
||||
|
||||
/**
|
||||
* @return the count of reads, one for each allele in the surrounding Variant context,
|
||||
* matching the corresponding allele, or null if this value is missing. MUST
|
||||
* NOT BE MODIFIED!
|
||||
*/
|
||||
public abstract int[] getAD();
|
||||
|
||||
/**
|
||||
* Returns the name associated with this sample.
|
||||
*
|
||||
* @return a non-null String
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public String getSampleName() {
|
||||
return sampleName;
|
||||
}
|
||||
|
||||
public Type getType() {
|
||||
/**
|
||||
* Returns a phred-scaled quality score, or -1 if none is available
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= -1")
|
||||
public abstract int getGQ();
|
||||
|
||||
/**
|
||||
* Does the PL field have a value?
|
||||
* @return true if there's a PL field value
|
||||
*/
|
||||
@Ensures("(result == false && getPL() == null) || (result == true && getPL() != null)")
|
||||
public boolean hasPL() {
|
||||
return getPL() != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the AD field have a value?
|
||||
* @return true if there's a AD field value
|
||||
*/
|
||||
@Ensures("(result == false && getAD() == null) || (result == true && getAD() != null)")
|
||||
public boolean hasAD() {
|
||||
return getAD() != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the GQ field have a value?
|
||||
* @return true if there's a GQ field value
|
||||
*/
|
||||
@Ensures("(result == false && getGQ() == -1) || (result == true && getGQ() >= 0)")
|
||||
public boolean hasGQ() {
|
||||
return getGQ() != -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the DP field have a value?
|
||||
* @return true if there's a DP field value
|
||||
*/
|
||||
@Ensures("(result == false && getDP() == -1) || (result == true && getDP() >= 0)")
|
||||
public boolean hasDP() {
|
||||
return getDP() != -1;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The type of this genotype
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @return the high-level type of this sample's genotype
|
||||
*/
|
||||
@Ensures({"type != null", "result != null"})
|
||||
public GenotypeType getType() {
|
||||
if ( type == null ) {
|
||||
type = determineType();
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
protected Type determineType() {
|
||||
if ( alleles.size() == 0 )
|
||||
return Type.UNAVAILABLE;
|
||||
/**
|
||||
* Internal code to determine the type of the genotype from the alleles vector
|
||||
* @return the type
|
||||
*/
|
||||
@Requires("type == null") // we should never call if already calculated
|
||||
protected GenotypeType determineType() {
|
||||
// TODO -- this code is slow and could be optimized for the diploid case
|
||||
final List<Allele> alleles = getAlleles();
|
||||
if ( alleles.isEmpty() )
|
||||
return GenotypeType.UNAVAILABLE;
|
||||
|
||||
boolean sawNoCall = false, sawMultipleAlleles = false;
|
||||
Allele observedAllele = null;
|
||||
|
||||
for ( Allele allele : alleles ) {
|
||||
for ( final Allele allele : alleles ) {
|
||||
if ( allele.isNoCall() )
|
||||
sawNoCall = true;
|
||||
else if ( observedAllele == null )
|
||||
|
|
@ -148,14 +206,14 @@ public class Genotype implements Comparable<Genotype> {
|
|||
|
||||
if ( sawNoCall ) {
|
||||
if ( observedAllele == null )
|
||||
return Type.NO_CALL;
|
||||
return Type.MIXED;
|
||||
return GenotypeType.NO_CALL;
|
||||
return GenotypeType.MIXED;
|
||||
}
|
||||
|
||||
if ( observedAllele == null )
|
||||
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
|
||||
|
||||
return sawMultipleAlleles ? Type.HET : observedAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
|
||||
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -166,101 +224,108 @@ public class Genotype implements Comparable<Genotype> {
|
|||
/**
|
||||
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomRef() { return getType() == Type.HOM_REF; }
|
||||
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
|
||||
|
||||
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
|
||||
|
||||
/**
|
||||
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
|
||||
*/
|
||||
public boolean isHet() { return getType() == Type.HET; }
|
||||
public boolean isHet() { return getType() == GenotypeType.HET; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
|
||||
*/
|
||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
|
||||
*/
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
||||
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is comprised of both calls and no-calls.
|
||||
*/
|
||||
public boolean isMixed() { return getType() == Type.MIXED; }
|
||||
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
|
||||
|
||||
/**
|
||||
* @return true if the type of this genotype is set.
|
||||
*/
|
||||
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
||||
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
//
|
||||
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
||||
// methods for getting genotype likelihoods for a genotype object, if present
|
||||
//
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @return Returns true if this Genotype has PL field values
|
||||
*/
|
||||
@Ensures("(result && getLikelihoods() != null) || (! result && getLikelihoods() == null)")
|
||||
public boolean hasLikelihoods() {
|
||||
return (hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
|
||||
(hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
|
||||
return getPL() != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that returns a string representation of the PL field of this
|
||||
* genotype, or . if none is available.
|
||||
*
|
||||
* @return
|
||||
* @return a non-null String representation for the PL of this sample
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public String getLikelihoodsString() {
|
||||
GenotypeLikelihoods gl = getLikelihoods();
|
||||
return gl == null ? VCFConstants.MISSING_VALUE_v4 : gl.toString();
|
||||
return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing
|
||||
* @return null or a GenotypesLikelihood object for this sample's PL field
|
||||
*/
|
||||
@Ensures("(hasLikelihoods() && result != null) || (! hasLikelihoods() && result == null)")
|
||||
public GenotypeLikelihoods getLikelihoods() {
|
||||
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
|
||||
if ( x != null )
|
||||
return x;
|
||||
else {
|
||||
x = getLikelihoods(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, false);
|
||||
return x;
|
||||
}
|
||||
return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null;
|
||||
}
|
||||
|
||||
private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x instanceof String ) {
|
||||
if ( asPL )
|
||||
return GenotypeLikelihoods.fromPLField((String)x);
|
||||
else
|
||||
return GenotypeLikelihoods.fromGLField((String)x);
|
||||
}
|
||||
else if ( x instanceof GenotypeLikelihoods ) return (GenotypeLikelihoods)x;
|
||||
else return null;
|
||||
}
|
||||
/**
|
||||
* Unsafe low-level accessor the PL field itself, may be null.
|
||||
*
|
||||
* @return a pointer to the underlying PL data. MUST NOT BE MODIFIED!
|
||||
*/
|
||||
public abstract int[] getPL();
|
||||
|
||||
public void validate() {
|
||||
if ( alleles.size() == 0) return;
|
||||
|
||||
// int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele == null )
|
||||
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
|
||||
// nNoCalls += allele.isNoCall() ? 1 : 0;
|
||||
}
|
||||
|
||||
// Technically, the spec does allow for the below case so this is not an illegal state
|
||||
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||
}
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Many different string representations
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return a VCF-like string representation for the alleles of this genotype.
|
||||
*
|
||||
* Does not append the reference * marker on the alleles.
|
||||
*
|
||||
* @return a string representing the genotypes, or null if the type is unavailable.
|
||||
*/
|
||||
@Ensures("result != null || ! isAvailable()")
|
||||
public String getGenotypeString() {
|
||||
return getGenotypeString(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a VCF-like string representation for the alleles of this genotype.
|
||||
*
|
||||
* If ignoreRefState is true, will not append the reference * marker on the alleles.
|
||||
*
|
||||
* @return a string representing the genotypes, or null if the type is unavailable.
|
||||
*/
|
||||
@Ensures("result != null || ! isAvailable()")
|
||||
public String getGenotypeString(boolean ignoreRefState) {
|
||||
if ( alleles.size() == 0 )
|
||||
return null;
|
||||
if ( getPloidy() == 0 )
|
||||
return "NA";
|
||||
|
||||
// Notes:
|
||||
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
|
||||
|
|
@ -270,29 +335,54 @@ public class Genotype implements Comparable<Genotype> {
|
|||
ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
|
||||
}
|
||||
|
||||
private List<String> getAlleleStrings() {
|
||||
List<String> al = new ArrayList<String>();
|
||||
for ( Allele a : alleles )
|
||||
/**
|
||||
* Utility that returns a list of allele strings corresponding to the alleles in this sample
|
||||
* @return
|
||||
*/
|
||||
protected List<String> getAlleleStrings() {
|
||||
final List<String> al = new ArrayList<String>(getPloidy());
|
||||
for ( Allele a : getAlleles() )
|
||||
al.add(a.getBaseString());
|
||||
|
||||
return al;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
int Q = getPhredScaledQual();
|
||||
return String.format("[%s %s Q%s %s]", getSampleName(), getGenotypeString(false),
|
||||
Q == -1 ? "." : String.format("%2d",Q), sortedString(getAttributes()));
|
||||
return String.format("[%s %s%s%s%s%s%s]",
|
||||
getSampleName(),
|
||||
getGenotypeString(false),
|
||||
toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()),
|
||||
toStringIfExists(VCFConstants.DEPTH_KEY, getDP()),
|
||||
toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()),
|
||||
toStringIfExists(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, getPL()),
|
||||
sortedString(getExtendedAttributes()));
|
||||
}
|
||||
|
||||
public String toBriefString() {
|
||||
return String.format("%s:Q%d", getGenotypeString(false), getPhredScaledQual());
|
||||
return String.format("%s:Q%d", getGenotypeString(false), getGQ());
|
||||
}
|
||||
|
||||
public boolean sameGenotype(Genotype other) {
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Comparison operations
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* comparable genotypes -> compareTo on the sample names
|
||||
* @param genotype
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(final Genotype genotype) {
|
||||
return getSampleName().compareTo(genotype.getSampleName());
|
||||
}
|
||||
|
||||
public boolean sameGenotype(final Genotype other) {
|
||||
return sameGenotype(other, true);
|
||||
}
|
||||
|
||||
public boolean sameGenotype(Genotype other, boolean ignorePhase) {
|
||||
public boolean sameGenotype(final Genotype other, boolean ignorePhase) {
|
||||
if (getPloidy() != other.getPloidy())
|
||||
return false; // gotta have the same number of allele to be equal
|
||||
|
||||
|
|
@ -308,6 +398,146 @@ public class Genotype implements Comparable<Genotype> {
|
|||
return thisAlleles.equals(otherAlleles);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// get routines for extended attributes
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the extended attributes for this object
|
||||
* @return is never null, but is often isEmpty()
|
||||
*/
|
||||
@Ensures({"result != null", "! hasForbiddenKey(result)"})
|
||||
public abstract Map<String, Object> getExtendedAttributes();
|
||||
|
||||
/**
|
||||
* Is key associated with a value (even a null one) in the extended attributes?
|
||||
*
|
||||
* Note this will not return true for the inline attributes DP, GQ, AD, or PL
|
||||
*
|
||||
* @param key a non-null string key to check for an association
|
||||
* @return true if key has a value in the extendedAttributes
|
||||
*/
|
||||
@Requires({"key != null", "! isForbiddenKey(key)"})
|
||||
public boolean hasExtendedAttribute(final String key) {
|
||||
return getExtendedAttributes().containsKey(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the extended attribute value associated with key, if possible
|
||||
*
|
||||
* @param key a non-null string key to fetch a value for
|
||||
* @param defaultValue the value to return if key isn't in the extended attributes
|
||||
* @return a value (potentially) null associated with key, or defaultValue if no association exists
|
||||
*/
|
||||
@Requires({"key != null", "! isForbiddenKey(key)"})
|
||||
@Ensures("hasExtendedAttribute(key) || result == defaultValue")
|
||||
public Object getExtendedAttribute(final String key, final Object defaultValue) {
|
||||
return hasExtendedAttribute(key) ? getExtendedAttributes().get(key) : defaultValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as #getExtendedAttribute with a null default
|
||||
*
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public Object getExtendedAttribute(final String key) {
|
||||
return getExtendedAttribute(key, null);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"result != null", "filtersWereApplied() || result.isEmpty()"})
|
||||
public abstract List<String> getFilters();
|
||||
|
||||
@Ensures({"result != getFilters().isEmpty()"})
|
||||
public boolean isFiltered() {
|
||||
return ! getFilters().isEmpty();
|
||||
}
|
||||
|
||||
@Ensures("result == true || getFilters().isEmpty()")
|
||||
public abstract boolean filtersWereApplied();
|
||||
|
||||
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
|
||||
@Deprecated public double getLog10PError() { return getGQ() / -10.0; }
|
||||
@Deprecated public int getPhredScaledQual() { return getGQ(); }
|
||||
|
||||
@Deprecated
|
||||
public String getAttributeAsString(String key, String defaultValue) {
|
||||
Object x = getExtendedAttribute(key);
|
||||
if ( x == null ) return defaultValue;
|
||||
if ( x instanceof String ) return (String)x;
|
||||
return String.valueOf(x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public int getAttributeAsInt(String key, int defaultValue) {
|
||||
Object x = getExtendedAttribute(key);
|
||||
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
|
||||
if ( x instanceof Integer ) return (Integer)x;
|
||||
return Integer.valueOf((String)x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public double getAttributeAsDouble(String key, double defaultValue) {
|
||||
Object x = getExtendedAttribute(key);
|
||||
if ( x == null ) return defaultValue;
|
||||
if ( x instanceof Double ) return (Double)x;
|
||||
return Double.valueOf((String)x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
/**
|
||||
* A totally generic getter, that allows you to specific keys that correspond
|
||||
* to even inline values (GQ, for example). Can be very expensive. Additionally,
|
||||
* all int[] are converted inline into List<Integer> for convenience.
|
||||
*
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public Object getAnyAttribute(final String key) {
|
||||
if (key.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
return getAlleles();
|
||||
} else if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
return getGQ();
|
||||
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
|
||||
return Arrays.asList(getAD());
|
||||
} else if (key.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
return Arrays.asList(getPL());
|
||||
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
|
||||
return getDP();
|
||||
} else {
|
||||
return getExtendedAttribute(key);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasAnyAttribute(final String key) {
|
||||
if (key.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
return isAvailable();
|
||||
} else if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
return hasGQ();
|
||||
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
|
||||
return hasAD();
|
||||
} else if (key.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
return hasPL();
|
||||
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
|
||||
return hasDP();
|
||||
} else {
|
||||
return hasExtendedAttribute(key);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO -- add getAttributesAsX interface here
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
//
|
||||
// private utilities
|
||||
//
|
||||
// ------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* a utility method for generating sorted strings from a map key set.
|
||||
* @param c the map
|
||||
|
|
@ -315,63 +545,70 @@ public class Genotype implements Comparable<Genotype> {
|
|||
* @param <V> the value type
|
||||
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
|
||||
*/
|
||||
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
|
||||
@Requires("c != null")
|
||||
protected static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
|
||||
|
||||
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
|
||||
List<T> t = new ArrayList<T>(c.keySet());
|
||||
final List<T> t = new ArrayList<T>(c.keySet());
|
||||
Collections.sort(t);
|
||||
|
||||
List<String> pairs = new ArrayList<String>();
|
||||
for (T k : t) {
|
||||
final List<String> pairs = new ArrayList<String>();
|
||||
for (final T k : t) {
|
||||
pairs.add(k + "=" + c.get(k));
|
||||
}
|
||||
|
||||
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// get routines to access context info fields
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
public String getSampleName() { return commonInfo.getName(); }
|
||||
public Set<String> getFilters() { return commonInfo.getFilters(); }
|
||||
public Set<String> getFiltersMaybeNull() { return commonInfo.getFiltersMaybeNull(); }
|
||||
public boolean isFiltered() { return commonInfo.isFiltered(); }
|
||||
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
|
||||
public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
|
||||
public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
|
||||
public double getLog10PError() { return commonInfo.getLog10PError(); }
|
||||
|
||||
/**
|
||||
* Returns a phred-scaled quality score, or -1 if none is available
|
||||
* @return
|
||||
* Returns a display name for field name with value v if this isn't -1. Otherwise returns ""
|
||||
* @param name of the field ("AD")
|
||||
* @param v the value of the field, or -1 if missing
|
||||
* @return a non-null string for display if the field is not missing
|
||||
*/
|
||||
public int getPhredScaledQual() {
|
||||
final int i = (int)Math.round(commonInfo.getPhredScaledQual());
|
||||
return i < 0 ? -1 : i;
|
||||
@Requires("name != null")
|
||||
@Ensures("result != null")
|
||||
protected final static String toStringIfExists(final String name, final int v) {
|
||||
return v == -1 ? "" : " " + name + " " + v;
|
||||
}
|
||||
|
||||
public Map<String, Object> getAttributes() { return commonInfo.getAttributes(); }
|
||||
public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); }
|
||||
public Object getAttribute(String key) { return commonInfo.getAttribute(key); }
|
||||
|
||||
public Object getAttribute(String key, Object defaultValue) {
|
||||
return commonInfo.getAttribute(key, defaultValue);
|
||||
/**
|
||||
* Returns a display name for field name with values vs if this isn't null. Otherwise returns ""
|
||||
* @param name of the field ("AD")
|
||||
* @param vs the value of the field, or null if missing
|
||||
* @return a non-null string for display if the field is not missing
|
||||
*/
|
||||
@Requires("name != null")
|
||||
@Ensures("result != null")
|
||||
protected final static String toStringIfExists(final String name, final int[] vs) {
|
||||
if ( vs == null )
|
||||
return "";
|
||||
else {
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append(" ").append(name).append(" ");
|
||||
for ( int i = 0; i < vs.length; i++ ) {
|
||||
if ( i != 0 ) b.append(",");
|
||||
b.append(vs[i]);
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||
|
||||
/**
|
||||
* comparable genotypes -> compareTo on the sample names
|
||||
* @param genotype
|
||||
* Does the attribute map have a mapping involving a forbidden key (i.e.,
|
||||
* one that's managed inline by this Genotypes object?
|
||||
*
|
||||
* @param attributes the extended attributes key
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(final Genotype genotype) {
|
||||
return getSampleName().compareTo(genotype.getSampleName());
|
||||
protected final static boolean hasForbiddenKey(final Map<String, Object> attributes) {
|
||||
for ( final String forbidden : PRIMARY_KEYS)
|
||||
if ( attributes.containsKey(forbidden) )
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
protected final static boolean isForbiddenKey(final String key) {
|
||||
return PRIMARY_KEYS.contains(key);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,417 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A builder class for genotypes
|
||||
*
|
||||
* Provides convenience setter methods for all of the Genotype field
|
||||
* values. Setter methods can be used in any order, allowing you to
|
||||
* pass through states that wouldn't be allowed in the highly regulated
|
||||
* immutable Genotype class.
|
||||
*
|
||||
* All fields default to meaningful MISSING values.
|
||||
*
|
||||
* Call make() to actually create the corresponding Genotype object from
|
||||
* this builder. Can be called multiple times to create independent copies,
|
||||
* or with intervening sets to conveniently make similar Genotypes with
|
||||
* slight modifications.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
public final class GenotypeBuilder {
|
||||
public static boolean MAKE_FAST_BY_DEFAULT = true;
|
||||
|
||||
private String sampleName = null;
|
||||
private List<Allele> alleles = Collections.emptyList();
|
||||
|
||||
private boolean isPhased = false;
|
||||
private int GQ = -1;
|
||||
private int DP = -1;
|
||||
private int[] AD = null;
|
||||
private int[] PL = null;
|
||||
private Map<String, Object> extendedAttributes = null;
|
||||
private int initialAttributeMapSize = 5;
|
||||
|
||||
private boolean useFast = MAKE_FAST_BY_DEFAULT;
|
||||
|
||||
private final static Map<String, Object> NO_ATTRIBUTES =
|
||||
Collections.unmodifiableMap(new HashMap<String, Object>(0));
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Factory methods
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
public final static Genotype create(final String sampleName, final List<Allele> alleles) {
|
||||
return new GenotypeBuilder(sampleName, alleles).make();
|
||||
}
|
||||
|
||||
public final static Genotype create(final String sampleName,
|
||||
final List<Allele> alleles,
|
||||
final Map<String, Object> attributes) {
|
||||
return new GenotypeBuilder(sampleName, alleles).attributes(attributes).make();
|
||||
}
|
||||
|
||||
protected final static Genotype create(final String sampleName,
|
||||
final List<Allele> alleles,
|
||||
final double[] gls) {
|
||||
return new GenotypeBuilder(sampleName, alleles).PL(gls).make();
|
||||
}
|
||||
|
||||
public final static Genotype create(final String sampleName,
|
||||
final List<Allele> alleles,
|
||||
final double log10Perror,
|
||||
final Map<String, Object> attributes) {
|
||||
return new GenotypeBuilder(sampleName, alleles)
|
||||
.GQ(log10Perror == SlowGenotype.NO_LOG10_PERROR ? -1 : (int)(log10Perror * -10))
|
||||
.attributes(attributes).make();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a empty builder. Both a sampleName and alleles must be provided
|
||||
* before trying to make a Genotype from this builder.
|
||||
*/
|
||||
public GenotypeBuilder() {}
|
||||
|
||||
/**
|
||||
* Create a builder using sampleName. Alleles must be provided
|
||||
* before trying to make a Genotype from this builder.
|
||||
* @param sampleName
|
||||
*/
|
||||
public GenotypeBuilder(final String sampleName) {
|
||||
name(sampleName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a builder using sampleName and alleles for starting values
|
||||
* @param sampleName
|
||||
* @param alleles
|
||||
*/
|
||||
public GenotypeBuilder(final String sampleName, final List<Allele> alleles) {
|
||||
name(sampleName);
|
||||
alleles(alleles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new builder starting with the values in Genotype g
|
||||
* @param g
|
||||
*/
|
||||
public GenotypeBuilder(final Genotype g) {
|
||||
copy(g);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy all of the values for this builder from Genotype g
|
||||
* @param g
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder copy(final Genotype g) {
|
||||
name(g.getSampleName());
|
||||
alleles(g.getAlleles());
|
||||
phased(g.isPhased());
|
||||
GQ(g.getGQ());
|
||||
DP(g.getDP());
|
||||
AD(g.getAD());
|
||||
PL(g.getPL());
|
||||
attributes(g.getExtendedAttributes());
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset all of the builder attributes to their defaults. After this
|
||||
* function you must provide sampleName and alleles before trying to
|
||||
* make more Genotypes.
|
||||
*/
|
||||
public final void reset() {
|
||||
sampleName = null;
|
||||
alleles = null;
|
||||
isPhased = false;
|
||||
GQ = -1;
|
||||
DP = -1;
|
||||
AD = null;
|
||||
PL = null;
|
||||
extendedAttributes = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Genotype object using the values set in this builder.
|
||||
*
|
||||
* After creation the values in this builder can be modified and more Genotypes
|
||||
* created, althrough the contents of array values like PL should never be modified
|
||||
* inline as they are not copied for efficiency reasons.
|
||||
*
|
||||
* @return a newly minted Genotype object with values provided from this builder
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
public Genotype make() {
|
||||
if ( useFast ) {
|
||||
final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
|
||||
return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, ea);
|
||||
} else {
|
||||
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
|
||||
if ( extendedAttributes != null ) attributes.putAll(extendedAttributes);
|
||||
final double log10PError = GQ == -1 ? SlowGenotype.NO_LOG10_PERROR : (GQ == 0 ? 0 : GQ / -10.0);
|
||||
|
||||
Set<String> filters = null;
|
||||
if ( extendedAttributes != null && extendedAttributes.containsKey(VCFConstants.GENOTYPE_FILTER_KEY) )
|
||||
{
|
||||
final Object f = extendedAttributes.get(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
if ( f != null )
|
||||
filters = new LinkedHashSet<String>((List<String>)f);
|
||||
attributes.remove(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
}
|
||||
|
||||
if ( DP != -1 ) attributes.put(VCFConstants.DEPTH_KEY, DP);
|
||||
if ( AD != null ) attributes.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD);
|
||||
final double[] log10likelihoods = PL != null ? GenotypeLikelihoods.fromPLs(PL).getAsVector() : null;
|
||||
return new SlowGenotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10likelihoods);
|
||||
}
|
||||
}
|
||||
|
||||
public GenotypeBuilder useFast(boolean useFast) {
|
||||
this.useFast = useFast;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set this genotype's name
|
||||
* @param sampleName
|
||||
* @return
|
||||
*/
|
||||
@Requires({"sampleName != null"})
|
||||
@Ensures({"this.sampleName != null"})
|
||||
public GenotypeBuilder name(final String sampleName) {
|
||||
this.sampleName = sampleName;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set this genotype's alleles
|
||||
* @param alleles
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"this.alleles != null"})
|
||||
public GenotypeBuilder alleles(final List<Allele> alleles) {
|
||||
if ( alleles == null )
|
||||
this.alleles = Collections.emptyList();
|
||||
else
|
||||
this.alleles = alleles;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this genotype phased?
|
||||
* @param phased
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder phased(final boolean phased) {
|
||||
isPhased = phased;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Requires({"GQ >= -1"})
|
||||
@Ensures({"this.GQ == GQ", "this.GQ >= -1"})
|
||||
public GenotypeBuilder GQ(final int GQ) {
|
||||
this.GQ = GQ;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adaptor interface from the pLog10Error system.
|
||||
*
|
||||
* Will be retired when
|
||||
*
|
||||
* @param pLog10Error
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
public GenotypeBuilder log10PError(final double pLog10Error) {
|
||||
if ( pLog10Error == CommonInfo.NO_LOG10_PERROR )
|
||||
return GQ(-1);
|
||||
else
|
||||
return GQ((int)Math.round(pLog10Error * -10));
|
||||
}
|
||||
|
||||
/**
|
||||
* This genotype has no GQ value
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder noGQ() { GQ = -1; return this; }
|
||||
|
||||
/**
|
||||
* This genotype has no AD value
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder noAD() { AD = null; return this; }
|
||||
|
||||
/**
|
||||
* This genotype has no DP value
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder noDP() { DP = -1; return this; }
|
||||
|
||||
/**
|
||||
* This genotype has no PL value
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder noPL() { PL = null; return this; }
|
||||
|
||||
/**
|
||||
* This genotype has this DP value
|
||||
* @return
|
||||
*/
|
||||
@Requires({"DP >= -1"})
|
||||
@Ensures({"this.DP == DP"})
|
||||
public GenotypeBuilder DP(final int DP) {
|
||||
this.DP = DP;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* This genotype has this AD value
|
||||
* @return
|
||||
*/
|
||||
@Requires({"AD == null || AD.length > 0"})
|
||||
@Ensures({"this.AD == AD"})
|
||||
public GenotypeBuilder AD(final int[] AD) {
|
||||
this.AD = AD;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* This genotype has this PL value, as int[]. FAST
|
||||
* @return
|
||||
*/
|
||||
@Requires("PL == null || PL.length > 0")
|
||||
@Ensures({"this.PL == PL"})
|
||||
public GenotypeBuilder PL(final int[] PL) {
|
||||
this.PL = PL;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* This genotype has this PL value, converted from double[]. SLOW
|
||||
* @return
|
||||
*/
|
||||
@Requires("PL == null || PL.length > 0")
|
||||
@Ensures({"this.PL == PL"})
|
||||
public GenotypeBuilder PL(final double[] GLs) {
|
||||
this.PL = GenotypeLikelihoods.fromLog10Likelihoods(GLs).getAsPLs();
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* This genotype has these attributes.
|
||||
*
|
||||
* Cannot contain inline attributes (DP, AD, GQ, PL)
|
||||
* @return
|
||||
*/
|
||||
@Requires("attributes != null")
|
||||
@Ensures("attributes.isEmpty() || extendedAttributes != null")
|
||||
public GenotypeBuilder attributes(final Map<String, Object> attributes) {
|
||||
for ( Map.Entry<String, Object> pair : attributes.entrySet() )
|
||||
attribute(pair.getKey(), pair.getValue());
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this builder to remove all extended attributes
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder noAttributes() {
|
||||
this.extendedAttributes = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* This genotype has this attribute key / value pair.
|
||||
*
|
||||
* Cannot contain inline attributes (DP, AD, GQ, PL)
|
||||
* @return
|
||||
*/
|
||||
@Requires({"key != null"})
|
||||
@Ensures({"extendedAttributes != null", "extendedAttributes.containsKey(key)"})
|
||||
public GenotypeBuilder attribute(final String key, final Object value) {
|
||||
if ( extendedAttributes == null )
|
||||
extendedAttributes = new HashMap<String, Object>(initialAttributeMapSize);
|
||||
extendedAttributes.put(key, value);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this builder to make a Genotype object that has had filters applied,
|
||||
* which may be empty (passes) or have some value indicating the reasons
|
||||
* why it's been filtered.
|
||||
*
|
||||
* @param filters non-null list of filters. empty list => PASS
|
||||
* @return this builder
|
||||
*/
|
||||
@Requires("filters != null")
|
||||
public GenotypeBuilder filters(final List<String> filters) {
|
||||
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* varargs version of #filters
|
||||
* @param filters
|
||||
* @return
|
||||
*/
|
||||
@Requires("filters != null")
|
||||
public GenotypeBuilder filters(final String ... filters) {
|
||||
return filters(Arrays.asList(filters));
|
||||
}
|
||||
|
||||
/**
|
||||
* This genotype is unfiltered
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder unfiltered() {
|
||||
if ( extendedAttributes != null )
|
||||
extendedAttributes.remove(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tell's this builder that we have at most these number of attributes
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder maxAttributes(final int i) {
|
||||
initialAttributeMapSize = i;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
@ -48,6 +48,7 @@ public class GenotypeLikelihoods {
|
|||
return new GenotypeLikelihoods(PLs);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public final static GenotypeLikelihoods fromGLField(String GLs) {
|
||||
return new GenotypeLikelihoods(parseDeprecatedGLString(GLs));
|
||||
}
|
||||
|
|
@ -122,25 +123,25 @@ public class GenotypeLikelihoods {
|
|||
|
||||
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
|
||||
//Returns null in case of missing likelihoods
|
||||
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
|
||||
public EnumMap<GenotypeType,Double> getAsMap(boolean normalizeFromLog10){
|
||||
//Make sure that the log10likelihoods are set
|
||||
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
|
||||
if(likelihoods == null)
|
||||
return null;
|
||||
EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
|
||||
likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
|
||||
likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
|
||||
likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
|
||||
EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[GenotypeType.HOM_REF.ordinal()-1]);
|
||||
likelihoodsMap.put(GenotypeType.HET,likelihoods[GenotypeType.HET.ordinal()-1]);
|
||||
likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[GenotypeType.HOM_VAR.ordinal() - 1]);
|
||||
return likelihoodsMap;
|
||||
}
|
||||
|
||||
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
||||
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
||||
public double getLog10GQ(Genotype.Type genotype){
|
||||
return getQualFromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
||||
public double getLog10GQ(GenotypeType genotype){
|
||||
return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
||||
}
|
||||
|
||||
public static double getQualFromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
||||
public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
||||
if(likelihoods == null)
|
||||
return Double.NEGATIVE_INFINITY;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
/**
|
||||
* Summary types for Genotype objects
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public enum GenotypeType {
|
||||
/** The sample is no-called (all alleles are NO_CALL */
|
||||
NO_CALL,
|
||||
/** The sample is homozygous reference */
|
||||
HOM_REF,
|
||||
/** The sample is heterozygous, with at least one ref and at least one one alt in any order */
|
||||
HET,
|
||||
/** All alleles are non-reference */
|
||||
HOM_VAR,
|
||||
/** There is no allele data availble for this sample (alleles.isEmpty) */
|
||||
UNAVAILABLE,
|
||||
/** Some chromosomes are NO_CALL and others are called */
|
||||
MIXED // no-call and call in the same genotype
|
||||
}
|
||||
|
|
@ -272,6 +272,17 @@ public class GenotypesContext implements List<Genotype> {
|
|||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// Lazy methods
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
public boolean isLazyWithData() {
|
||||
return this instanceof LazyGenotypesContext &&
|
||||
((LazyGenotypesContext)this).getUnparsedGenotypeData() != null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// Map methods
|
||||
|
|
|
|||
|
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class encompasses all the basic information about a genotype. It is immutable.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
@Deprecated
|
||||
public class SlowGenotype extends Genotype {
|
||||
protected CommonInfo commonInfo;
|
||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||
protected List<Allele> alleles = null;
|
||||
protected boolean isPhased = false;
|
||||
|
||||
protected SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||
super(sampleName);
|
||||
|
||||
if ( alleles == null || alleles.isEmpty() )
|
||||
this.alleles = Collections.emptyList();
|
||||
else
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
|
||||
if ( log10Likelihoods != null )
|
||||
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||
this.isPhased = isPhased;
|
||||
validate();
|
||||
}
|
||||
|
||||
@Override public List<Allele> getAlleles() {
|
||||
return alleles;
|
||||
}
|
||||
|
||||
@Override public Allele getAllele(int i) {
|
||||
if ( getType() == GenotypeType.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
@Override public boolean isPhased() { return isPhased; }
|
||||
|
||||
//
|
||||
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
||||
//
|
||||
@Override public boolean hasLikelihoods() {
|
||||
return (commonInfo.hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
|
||||
(commonInfo.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
|
||||
}
|
||||
|
||||
@Override public GenotypeLikelihoods getLikelihoods() {
|
||||
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
|
||||
if ( x != null )
|
||||
return x;
|
||||
else {
|
||||
x = getLikelihoods(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, false);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) {
|
||||
Object x = commonInfo.getAttribute(key);
|
||||
if ( x instanceof String ) {
|
||||
if ( asPL )
|
||||
return GenotypeLikelihoods.fromPLField((String)x);
|
||||
else
|
||||
return GenotypeLikelihoods.fromGLField((String)x);
|
||||
}
|
||||
else if ( x instanceof GenotypeLikelihoods ) return (GenotypeLikelihoods)x;
|
||||
else return null;
|
||||
}
|
||||
|
||||
private final void validate() {
|
||||
if ( alleles.size() == 0) return;
|
||||
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele == null )
|
||||
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// get routines to access context info fields
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Override public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
|
||||
@Override public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
|
||||
@Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
|
||||
@Override public double getLog10PError() { return commonInfo.getLog10PError(); }
|
||||
|
||||
@Override
|
||||
public boolean hasExtendedAttribute(String key) { return commonInfo.hasAttribute(key); }
|
||||
|
||||
@Override
|
||||
public Object getExtendedAttribute(String key) { return commonInfo.getAttribute(key); }
|
||||
|
||||
@Override
|
||||
public Object getExtendedAttribute(String key, Object defaultValue) {
|
||||
return commonInfo.getAttribute(key, defaultValue);
|
||||
}
|
||||
|
||||
// public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||
// public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||
// public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||
// public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||
|
||||
@Override
|
||||
public int[] getPL() {
|
||||
return hasPL() ? getLikelihoods().getAsPLs() : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPL() {
|
||||
return hasLikelihoods();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDP() {
|
||||
return commonInfo.getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDP() {
|
||||
return commonInfo.hasAttribute(VCFConstants.DEPTH_KEY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getAD() {
|
||||
if ( hasAD() ) {
|
||||
return (int[])commonInfo.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasAD() {
|
||||
return commonInfo.hasAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getGQ() {
|
||||
if ( commonInfo.hasLog10PError() )
|
||||
return (int)Math.round(commonInfo.getPhredScaledQual());
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasGQ() {
|
||||
return hasLog10PError();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Object> getExtendedAttributes() {
|
||||
final Map<String, Object> ea = new LinkedHashMap<String, Object>(commonInfo.getAttributes());
|
||||
for ( final String primary : FastGenotype.PRIMARY_KEYS )
|
||||
ea.remove(primary);
|
||||
return ea;
|
||||
}
|
||||
}
|
||||
|
|
@ -327,19 +327,36 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
return builder.genotypes(genotypes.subsetToSamples(sampleNames)).alleles(alleles).make();
|
||||
}
|
||||
/**
|
||||
* This method subsets down to a set of samples.
|
||||
*
|
||||
* At the same time returns the alleles to just those in use by the samples,
|
||||
* if rederiveAllelesFromGenotypes is true, otherwise the full set of alleles
|
||||
* in this VC is returned as the set of alleles in the subContext, even if
|
||||
* some of those alleles aren't in the samples
|
||||
*
|
||||
* @param sampleNames
|
||||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||
if ( ! rederiveAllelesFromGenotypes && sampleNames.containsAll(getSampleNames()) ) {
|
||||
return this; // fast path when you don't have any work to do
|
||||
} else {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
|
||||
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
|
||||
return builder.genotypes(newGenotypes).alleles(allelesOfGenotypes(newGenotypes)).make();
|
||||
if ( rederiveAllelesFromGenotypes )
|
||||
builder.alleles(allelesOfGenotypes(newGenotypes));
|
||||
else {
|
||||
builder.alleles(alleles);
|
||||
}
|
||||
|
||||
return builder.genotypes(newGenotypes).make();
|
||||
}
|
||||
}
|
||||
|
||||
public VariantContext subContextFromSample(String sampleName) {
|
||||
return subContextFromSamples(Collections.singleton(sampleName));
|
||||
return subContextFromSamples(Collections.singleton(sampleName), true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -849,7 +866,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* @return chromosome count
|
||||
*/
|
||||
public int getCalledChrCount() {
|
||||
return getCalledChrCount(new HashSet<String>(0));
|
||||
final Set<String> noSamples = Collections.emptySet();
|
||||
return getCalledChrCount(noSamples);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -892,7 +910,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
|
||||
|
||||
for ( final Genotype g : genotypes ) {
|
||||
n += g.getAlleles(a).size();
|
||||
n += g.countAllele(a);
|
||||
}
|
||||
|
||||
return n;
|
||||
|
|
@ -922,7 +940,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
|
||||
private void calculateGenotypeCounts() {
|
||||
if ( genotypeCounts == null ) {
|
||||
genotypeCounts = new int[Genotype.Type.values().length];
|
||||
genotypeCounts = new int[GenotypeType.values().length];
|
||||
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
genotypeCounts[g.getType().ordinal()]++;
|
||||
|
|
@ -937,7 +955,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getNoCallCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.NO_CALL.ordinal()];
|
||||
return genotypeCounts[GenotypeType.NO_CALL.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -947,7 +965,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getHomRefCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HOM_REF.ordinal()];
|
||||
return genotypeCounts[GenotypeType.HOM_REF.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -957,7 +975,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getHetCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HET.ordinal()];
|
||||
return genotypeCounts[GenotypeType.HET.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -967,7 +985,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getHomVarCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
|
||||
return genotypeCounts[GenotypeType.HOM_VAR.ordinal()];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -977,7 +995,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
*/
|
||||
public int getMixedCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.MIXED.ordinal()];
|
||||
return genotypeCounts[GenotypeType.MIXED.ordinal()];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -1412,8 +1430,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) {
|
||||
final Map<String, Object> map = fullyDecodeAttributes(g.getAttributes(), header);
|
||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(), map, g.isPhased());
|
||||
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header);
|
||||
return new GenotypeBuilder(g).attributes(map).make();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class VariantContextUtils {
|
|||
|
||||
// if there are alternate alleles, record the relevant tags
|
||||
if ( vc.getAlternateAlleles().size() > 0 ) {
|
||||
ArrayList<String> alleleFreqs = new ArrayList<String>();
|
||||
ArrayList<Double> alleleFreqs = new ArrayList<Double>();
|
||||
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
||||
ArrayList<Integer> foundersAlleleCounts = new ArrayList<Integer>();
|
||||
double totalFoundersChromosomes = (double)vc.getCalledChrCount(founderIds);
|
||||
|
|
@ -109,10 +109,9 @@ public class VariantContextUtils {
|
|||
alleleCounts.add(vc.getCalledChrCount(allele));
|
||||
foundersAlleleCounts.add(foundersAltChromosomes);
|
||||
if ( AN == 0 ) {
|
||||
alleleFreqs.add("0.0");
|
||||
alleleFreqs.add(0.0);
|
||||
} else {
|
||||
// todo -- this is a performance problem
|
||||
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes));
|
||||
final Double freq = (double)foundersAltChromosomes / totalFoundersChromosomes;
|
||||
alleleFreqs.add(freq);
|
||||
}
|
||||
}
|
||||
|
|
@ -155,22 +154,11 @@ public class VariantContextUtils {
|
|||
builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, founderIds));
|
||||
}
|
||||
|
||||
public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {
|
||||
int precision = 1;
|
||||
|
||||
while ( maxValue > 1 ) {
|
||||
precision++;
|
||||
maxValue /= 10.0;
|
||||
}
|
||||
|
||||
return "%." + precision + "f";
|
||||
}
|
||||
|
||||
public static Genotype removePLs(Genotype g) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
|
||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
|
||||
if ( g.hasLikelihoods() )
|
||||
return new GenotypeBuilder(g).noPL().make();
|
||||
else
|
||||
return g;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -257,8 +245,7 @@ public class VariantContextUtils {
|
|||
newGenotypeAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
}
|
||||
genotypes.add(new Genotype(g.getSampleName(), newGenotypeAlleles, g.getLog10PError(),
|
||||
g.getFilters(), g.getAttributes(), g.isPhased()));
|
||||
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -475,9 +462,10 @@ public class VariantContextUtils {
|
|||
// Genotypes
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
|
||||
genotypes.add(new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(),
|
||||
genotypeAttributes, g.isPhased()));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
|
||||
gb.noAD().noDP().noPL().noAttributes();
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).attributes(attributes);
|
||||
|
|
@ -833,7 +821,7 @@ public class VariantContextUtils {
|
|||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -878,7 +866,7 @@ public class VariantContextUtils {
|
|||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
|
||||
|
|
@ -1073,7 +1061,7 @@ public class VariantContextUtils {
|
|||
|
||||
if ( uniqifySamples || alleleMapping.needsRemapping() ) {
|
||||
final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
|
||||
newG = new Genotype(name, alleles, g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased());
|
||||
newG = new GenotypeBuilder(g).name(name).alleles(alleles).make();
|
||||
}
|
||||
|
||||
mergedGenotypes.add(newG);
|
||||
|
|
@ -1113,7 +1101,7 @@ public class VariantContextUtils {
|
|||
newAllele = Allele.NO_CALL;
|
||||
newAlleles.add(newAllele);
|
||||
}
|
||||
newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
|
||||
|
|
@ -1126,11 +1114,11 @@ public class VariantContextUtils {
|
|||
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>();
|
||||
for ( Map.Entry<String, Object> attr : genotype.getAttributes().entrySet() ) {
|
||||
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
|
||||
if ( allowedAttributes.contains(attr.getKey()) )
|
||||
attrs.put(attr.getKey(), attr.getValue());
|
||||
}
|
||||
newGenotypes.add(Genotype.modifyAttributes(genotype, attrs));
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
|
||||
|
|
@ -1247,7 +1235,7 @@ public class VariantContextUtils {
|
|||
for ( int k = 0; k < oldGTs.size(); k++ ) {
|
||||
final Genotype g = oldGTs.get(sampleIndices.get(k));
|
||||
if ( !g.hasLikelihoods() ) {
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -1268,51 +1256,35 @@ public class VariantContextUtils {
|
|||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
|
||||
if ( numNewAltAlleles == 0 )
|
||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
gb.noPL();
|
||||
else
|
||||
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
|
||||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
|
||||
else
|
||||
newGTs.add(assignDiploidGenotype(g, newLikelihoods, allelesToUse, attrs));
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
}
|
||||
else {
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
|
||||
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||
}
|
||||
newGTs.add(gb.make());
|
||||
}
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
||||
*
|
||||
* @param originalGT the original genotype
|
||||
* @param newLikelihoods the PL array
|
||||
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
|
||||
* @param attrs the annotations to use when creating the genotype
|
||||
*
|
||||
* @return genotype
|
||||
*/
|
||||
private static Genotype assignDiploidGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||
myAlleles.add(allelesToUse.get(alleles.alleleIndex1));
|
||||
myAlleles.add(allelesToUse.get(alleles.alleleIndex2));
|
||||
|
||||
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
|
||||
return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
|
||||
* contraction of a series of identical bases in the reference.
|
||||
|
|
|
|||
|
|
@ -192,8 +192,8 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
|||
infoMap.put("isHomRef", g.isHomRef() ? "1" : "0");
|
||||
infoMap.put("isHet", g.isHet() ? "1" : "0");
|
||||
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
|
||||
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getPhredScaledQual());
|
||||
for ( Map.Entry<String, Object> e : g.getAttributes().entrySet() ) {
|
||||
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ());
|
||||
for ( Map.Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
|
||||
if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
|
||||
infoMap.put(e.getKey(), e.getValue());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,22 +22,25 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Simple BCF2 encoder
|
||||
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
|
||||
*
|
||||
* @author depristo
|
||||
* @since 5/12
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
public class BCF2Encoder {
|
||||
public final class BCF2Encoder {
|
||||
// TODO -- increase default size?
|
||||
public static final int WRITE_BUFFER_INITIAL_SIZE = 16384;
|
||||
private ByteArrayOutputStream encodeStream = new ByteArrayOutputStream(WRITE_BUFFER_INITIAL_SIZE);
|
||||
|
|
@ -48,10 +51,7 @@ public class BCF2Encoder {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public int getRecordSizeInBytes() {
|
||||
return encodeStream.size();
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public byte[] getRecordBytes() {
|
||||
byte[] bytes = encodeStream.toByteArray();
|
||||
encodeStream.reset();
|
||||
|
|
@ -64,18 +64,67 @@ public class BCF2Encoder {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTypedMissing(final BCF2Type type) throws IOException {
|
||||
encodeTyped(Collections.emptyList(), type);
|
||||
encodeType(0, type);
|
||||
}
|
||||
|
||||
// todo -- should be specialized for each object type for efficiency
|
||||
public final void encodeTyped(final Object v, final BCF2Type type) throws IOException {
|
||||
encodeTyped(Collections.singletonList(v), type);
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTyped(final Object value, final BCF2Type type) throws IOException {
|
||||
if ( value == null )
|
||||
encodeTypedMissing(type);
|
||||
else {
|
||||
switch ( type ) {
|
||||
case INT8:
|
||||
case INT16:
|
||||
case INT32: encodeTypedInt((Integer)value, type); break;
|
||||
case FLOAT: encodeTypedFloat((Double) value); break;
|
||||
case CHAR: encodeTypedString((String) value); break;
|
||||
default: throw new ReviewedStingException("Illegal type encountered " + type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTypedInt(final int v) throws IOException {
|
||||
final BCF2Type type = BCF2Utils.determineIntegerType(v);
|
||||
encodeTypedInt(v, type);
|
||||
}
|
||||
|
||||
@Requires("type.isIntegerType()")
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTypedInt(final int v, final BCF2Type type) throws IOException {
|
||||
encodeType(1, type);
|
||||
encodeRawInt(v, type);
|
||||
}
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTypedString(final String s) throws IOException {
|
||||
encodeTypedString(s.getBytes());
|
||||
}
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTypedString(final byte[] s) throws IOException {
|
||||
if ( s == null )
|
||||
encodeType(0, BCF2Type.CHAR);
|
||||
else {
|
||||
encodeType(s.length, BCF2Type.CHAR);
|
||||
for ( int i = 0; i < s.length; i++ ) {
|
||||
encodeRawChar(s[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTypedFloat(final double d) throws IOException {
|
||||
encodeType(1, BCF2Type.FLOAT);
|
||||
encodeRawFloat(d);
|
||||
}
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTyped(List<? extends Object> v, final BCF2Type type) throws IOException {
|
||||
if ( type == BCF2Type.CHAR && v.size() != 0 ) {
|
||||
final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List<String>)v) : (String)v.get(0);
|
||||
final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List<String>) v) : (String)v.get(0);
|
||||
v = stringToBytes(s);
|
||||
}
|
||||
|
||||
|
|
@ -103,7 +152,7 @@ public class BCF2Encoder {
|
|||
switch (type) {
|
||||
case INT8:
|
||||
case INT16:
|
||||
case INT32: encodePrimitive((Integer)value, type); break;
|
||||
case INT32: encodeRawBytes((Integer) value, type); break;
|
||||
case FLOAT: encodeRawFloat((Double) value); break;
|
||||
case CHAR: encodeRawChar((Byte) value); break;
|
||||
default: throw new ReviewedStingException("Illegal type encountered " + type);
|
||||
|
|
@ -114,13 +163,13 @@ public class BCF2Encoder {
|
|||
}
|
||||
}
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeRawMissingValue(final BCF2Type type) throws IOException {
|
||||
encodePrimitive(type.getMissingBytes(), type);
|
||||
encodeRawBytes(type.getMissingBytes(), type);
|
||||
}
|
||||
|
||||
@Requires("size >= 0")
|
||||
public final void encodeRawMissingValues(final int size, final BCF2Type type) throws IOException {
|
||||
if ( size <= 0 ) throw new ReviewedStingException("BUG: size <= 0");
|
||||
|
||||
for ( int i = 0; i < size; i++ )
|
||||
encodeRawMissingValue(type);
|
||||
}
|
||||
|
|
@ -136,26 +185,28 @@ public class BCF2Encoder {
|
|||
}
|
||||
|
||||
public final void encodeRawFloat(final double value) throws IOException {
|
||||
encodePrimitive(Float.floatToIntBits((float)value), BCF2Type.FLOAT);
|
||||
encodeRawBytes(Float.floatToIntBits((float) value), BCF2Type.FLOAT);
|
||||
}
|
||||
|
||||
@Requires("size >= 0")
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeType(final int size, final BCF2Type type) throws IOException {
|
||||
if ( size < 0 ) throw new ReviewedStingException("BUG: size < 0");
|
||||
|
||||
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
|
||||
encodeStream.write(typeByte);
|
||||
if ( BCF2Utils.willOverflow(size) ) {
|
||||
// write in the overflow size
|
||||
encodeTyped(size, determineIntegerType(size));
|
||||
encodeTypedInt(size);
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeRawInt(final int value, final BCF2Type type) throws IOException {
|
||||
encodePrimitive(value, type, encodeStream);
|
||||
BCF2Utils.encodeRawBytes(value, type, encodeStream);
|
||||
}
|
||||
|
||||
public final void encodePrimitive(final int value, final BCF2Type type) throws IOException {
|
||||
encodePrimitive(value, type, encodeStream);
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeRawBytes(final int value, final BCF2Type type) throws IOException {
|
||||
BCF2Utils.encodeRawBytes(value, type, encodeStream);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
@ -164,42 +215,14 @@ public class BCF2Encoder {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public final BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
public final BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
public final BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : BCF2Utils.INTEGER_TYPES_BY_SIZE ) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
return potentialType;
|
||||
}
|
||||
|
||||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
@Requires({"s != null", "sizeToWrite >= 0"})
|
||||
public void encodeRawString(final String s, final int sizeToWrite) throws IOException {
|
||||
final byte[] bytes = s.getBytes();
|
||||
for ( int i = 0; i < sizeToWrite; i++ )
|
||||
if ( i < bytes.length )
|
||||
encodeRawChar(bytes[i]);
|
||||
else
|
||||
encodeRawMissingValue(BCF2Type.CHAR);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -210,7 +233,8 @@ public class BCF2Encoder {
|
|||
* @param o
|
||||
* @return
|
||||
*/
|
||||
protected final BCF2Type encode(final Object o) throws IOException {
|
||||
@Requires("o != null")
|
||||
public final BCF2Type encode(final Object o) throws IOException {
|
||||
if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values");
|
||||
|
||||
if ( o instanceof List ) {
|
||||
|
|
@ -224,11 +248,12 @@ public class BCF2Encoder {
|
|||
}
|
||||
}
|
||||
|
||||
@Requires("arg != null")
|
||||
private final BCF2Type determineBCFType(final Object arg) {
|
||||
final Object toType = arg instanceof List ? ((List)arg).get(0) : arg;
|
||||
|
||||
if ( toType instanceof Integer )
|
||||
return determineIntegerType((Integer)toType);
|
||||
return BCF2Utils.determineIntegerType((Integer) toType);
|
||||
else if ( toType instanceof String )
|
||||
return BCF2Type.CHAR;
|
||||
else if ( toType instanceof Double )
|
||||
|
|
@ -237,15 +262,6 @@ public class BCF2Encoder {
|
|||
throw new ReviewedStingException("No native encoding for Object of type " + arg.getClass().getSimpleName());
|
||||
}
|
||||
|
||||
public final static void encodePrimitive(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
for ( int i = type.getSizeInBytes() - 1; i >= 0; i-- ) {
|
||||
final int shift = i * 8;
|
||||
int mask = 0xFF << shift;
|
||||
int byteValue = (mask & value) >> shift;
|
||||
encodeStream.write(byteValue);
|
||||
}
|
||||
}
|
||||
|
||||
private final List<Byte> stringToBytes(final String v) throws IOException {
|
||||
if ( v == null || v.equals("") )
|
||||
return Collections.emptyList();
|
||||
|
|
@ -0,0 +1,529 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
@Invariant({
|
||||
"headerLine != null",
|
||||
"BCF2Type.INTEGERS.contains(dictionaryOffsetType)",
|
||||
"dictionaryOffset >= 0"
|
||||
})
|
||||
public abstract class BCF2FieldEncoder {
|
||||
/**
|
||||
* The header line describing the field we will encode values of
|
||||
*/
|
||||
final VCFCompoundHeaderLine headerLine;
|
||||
|
||||
/**
|
||||
* The BCF2 type we'll use to encoder this field, if it can be determined statically.
|
||||
* If not, this variable must be null
|
||||
*/
|
||||
final BCF2Type staticType;
|
||||
|
||||
/**
|
||||
* The integer offset into the strings map of the BCF2 file corresponding to this
|
||||
* field.
|
||||
*/
|
||||
final int dictionaryOffset;
|
||||
|
||||
/**
|
||||
* The integer type we use to encode our dictionary offset in the BCF2 file
|
||||
*/
|
||||
final BCF2Type dictionaryOffsetType;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructor
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Requires({"headerLine != null", "dict != null"})
|
||||
private BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict, final BCF2Type staticType) {
|
||||
this.headerLine = headerLine;
|
||||
this.staticType = staticType;
|
||||
|
||||
final Integer offset = dict.get(getField());
|
||||
if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + getField() + " in header as required by BCF");
|
||||
this.dictionaryOffset = offset;
|
||||
dictionaryOffsetType = BCF2Utils.determineIntegerType(offset);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Basic accessors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Ensures("result != null")
|
||||
public final String getField() { return headerLine.getID(); }
|
||||
|
||||
/**
|
||||
* Write the field key (dictionary offset and type) into the BCF2Encoder stream
|
||||
*
|
||||
* @param encoder where we write our dictionary offset
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires("encoder != null")
|
||||
public final void writeFieldKey(final BCF2Encoder encoder) throws IOException {
|
||||
encoder.encodeTypedInt(dictionaryOffset, dictionaryOffsetType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BCF2FieldEncoder for " + getField() + " with count " + getCountType() + " encoded with " + getClass().getSimpleName();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// methods to determine the number of encoded elements
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Ensures("result != null")
|
||||
protected final VCFHeaderLineCount getCountType() {
|
||||
return headerLine.getCountType();
|
||||
}
|
||||
|
||||
/**
|
||||
* True if this field has a constant, fixed number of elements (such as 1 for an atomic integer)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != (hasValueDeterminedNumElements() || hasContextDeterminedNumElements())")
|
||||
public boolean hasConstantNumElements() {
|
||||
return getCountType() == VCFHeaderLineCount.INTEGER;
|
||||
}
|
||||
|
||||
/**
|
||||
* True if the only way to determine how many elements this field contains is by
|
||||
* inspecting the actual value directly, such as when the number of elements
|
||||
* is a variable length list per site or per genotype.
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != (hasConstantNumElements() || hasContextDeterminedNumElements())")
|
||||
public boolean hasValueDeterminedNumElements() {
|
||||
return getCountType() == VCFHeaderLineCount.UNBOUNDED;
|
||||
}
|
||||
|
||||
/**
|
||||
* True if this field has a non-fixed number of elements that depends only on the properties
|
||||
* of the current VariantContext, such as one value per Allele or per genotype configuration.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != (hasValueDeterminedNumElements() || hasConstantNumElements())")
|
||||
public boolean hasContextDeterminedNumElements() {
|
||||
return ! hasConstantNumElements() && ! hasValueDeterminedNumElements();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of elements, assuming this field has a constant number of elements.
|
||||
* @return
|
||||
*/
|
||||
@Requires("hasConstantNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements() {
|
||||
return headerLine.getCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of elements by looking at the actual value provided
|
||||
* @return
|
||||
*/
|
||||
@Requires("hasValueDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements(final Object value) {
|
||||
return numElementsFromValue(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of elements, assuming this field has context-determined number of elements.
|
||||
* @return
|
||||
*/
|
||||
@Requires("hasContextDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements(final VariantContext vc) {
|
||||
return headerLine.getCount(vc.getNAlleles() - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenience access for the number of elements, returning
|
||||
* the number of encoded elements, either from the fixed number
|
||||
* it has, from the VC, or from the value itself.
|
||||
* @param vc
|
||||
* @param value
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public final int numElements(final VariantContext vc, final Object value) {
|
||||
if ( hasConstantNumElements() ) return numElements();
|
||||
else if ( hasContextDeterminedNumElements() ) return numElements(vc);
|
||||
else return numElements(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a value, return the number of elements we will encode for it.
|
||||
*
|
||||
* Assumes the value is encoded as a List
|
||||
*
|
||||
* @param value
|
||||
* @return
|
||||
*/
|
||||
@Requires("hasValueDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
protected int numElementsFromValue(final Object value) {
|
||||
if ( value == null ) return 0;
|
||||
else if ( value instanceof List ) return ((List) value).size();
|
||||
else return 1;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// methods to determine the BCF2 type of the encoded values
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Is the BCF2 type of this field static, or does it have to be determine from
|
||||
* the actual field value itself?
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result || isDynamicallyTyped()")
|
||||
public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); }
|
||||
|
||||
/**
|
||||
* Is the BCF2 type of this field static, or does it have to be determine from
|
||||
* the actual field value itself?
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result || isStaticallyTyped()")
|
||||
public final boolean isDynamicallyTyped() { return staticType == null; }
|
||||
|
||||
/**
|
||||
* Get the BCF2 type for this field, either from the static type of the
|
||||
* field itself or by inspecting the value itself.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public final BCF2Type getType(final Object value) {
|
||||
return isDynamicallyTyped() ? getDynamicType(value) : getStaticType();
|
||||
}
|
||||
|
||||
@Requires("isStaticallyTyped()")
|
||||
@Ensures("result != null")
|
||||
public final BCF2Type getStaticType() {
|
||||
return staticType;
|
||||
}
|
||||
|
||||
@Requires("isDynamicallyTyped()")
|
||||
@Ensures("result != null")
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
throw new ReviewedStingException("BUG: cannot get dynamic type for statically typed BCF2 field");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// methods to encode values, including the key abstract method
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convenience method that just called encodeValue with a no minimum for the number of values.
|
||||
*
|
||||
* Primarily useful for encoding site values
|
||||
*
|
||||
* @param encoder
|
||||
* @param value
|
||||
* @param type
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
|
||||
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encodeValue(encoder, value, type, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Key abstract method that should encode a value of the given type into the encoder.
|
||||
*
|
||||
* Value will be of a type appropriate to the underlying encoder. If the genotype field is represented as
|
||||
* an int[], this will be value, and the encoder needs to handle encoding all of the values in the int[].
|
||||
*
|
||||
* The argument should be used, not the getType() method in the superclass as an outer loop might have
|
||||
* decided a more general type (int16) to use, even through this encoder could have been done with int8.
|
||||
*
|
||||
* If minValues > 0, then encodeValue must write in at least minValues items from value. If value is atomic,
|
||||
* this means that minValues - 1 MISSING values should be added to the encoder. If minValues is a collection
|
||||
* type (int[]) then minValues - values.length should be added. This argument is intended to handle padding
|
||||
* of values in genotype fields.
|
||||
*
|
||||
* @param encoder
|
||||
* @param value
|
||||
* @param type
|
||||
* @param minValues
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()", "minValues >= 0"})
|
||||
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode Strings
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class StringOrCharacter extends BCF2FieldEncoder {
|
||||
public StringOrCharacter(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, BCF2Type.CHAR);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
final String s = javaStringToBCF2String(value);
|
||||
encoder.encodeRawString(s, Math.max(s.length(), minValues));
|
||||
}
|
||||
|
||||
//
|
||||
// Regardless of what the header says, BCF2 strings and characters are always encoded
|
||||
// as arrays of CHAR type, which has a variable number of elements depending on the
|
||||
// exact string being encoded
|
||||
//
|
||||
@Override public boolean hasConstantNumElements() { return false; }
|
||||
@Override public boolean hasContextDeterminedNumElements() { return false; }
|
||||
@Override public boolean hasValueDeterminedNumElements() { return true; }
|
||||
@Override protected int numElementsFromValue(final Object value) {
|
||||
return value == null ? 0 : javaStringToBCF2String(value).length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Recode the incoming object to a String, compacting it into a
|
||||
* BCF2 string if the value is a list.
|
||||
*
|
||||
* @param value a String or List<String> to encode, or null
|
||||
* @return a non-null string to encode
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
private String javaStringToBCF2String(final Object value) {
|
||||
if ( value == null )
|
||||
return "";
|
||||
else if (value instanceof List) {
|
||||
if ( ((List) value).size() == 1 )
|
||||
return (String)((List) value).get(0);
|
||||
else
|
||||
return BCF2Utils.collapseStringList((List<String>)value);
|
||||
} else
|
||||
return (String)value;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode FLAG
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class Flag extends BCF2FieldEncoder {
|
||||
public Flag(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, BCF2Type.INT8);
|
||||
if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 )
|
||||
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numElements() {
|
||||
return 1; // the header says 0 but we will write 1 value
|
||||
}
|
||||
|
||||
@Override
|
||||
@Requires("minValues <= 1")
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
encoder.encodeRawBytes(1, getStaticType());
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode FLOAT
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class Float extends BCF2FieldEncoder {
|
||||
final boolean isAtomic;
|
||||
|
||||
public Float(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, BCF2Type.FLOAT);
|
||||
isAtomic = hasConstantNumElements() && numElements() == 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
// TODO -- can be restructured to avoid toList operation
|
||||
if ( isAtomic ) {
|
||||
// fast path for fields with 1 fixed float value
|
||||
if ( value != null ) {
|
||||
encoder.encodeRawFloat((Double)value);
|
||||
count++;
|
||||
}
|
||||
} else {
|
||||
// handle generic case
|
||||
final List<Double> doubles = toList(Double.class, value);
|
||||
for ( final double d : doubles ) {
|
||||
encoder.encodeRawFloat(d);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode int[]
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class IntArray extends BCF2FieldEncoder {
|
||||
public IntArray(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int numElementsFromValue(final Object value) {
|
||||
return value == null ? 0 : ((int[])value).length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((int[])value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
if ( value != null ) {
|
||||
for ( final int i : (int[])value ) {
|
||||
encoder.encodeRawInt(i, type);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode List<Integer>
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Specialized int encoder for atomic (non-list) integers
|
||||
*/
|
||||
public static class AtomicInt extends BCF2FieldEncoder {
|
||||
public AtomicInt(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((Integer)value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
if ( value != null ) {
|
||||
encoder.encodeRawInt((Integer)value, type);
|
||||
count++;
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
}
|
||||
|
||||
public static class GenericInts extends BCF2FieldEncoder {
|
||||
public GenericInts(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType(toList(Integer.class, value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
for ( final int i : toList(Integer.class, value) ) {
|
||||
encoder.encodeRawInt(i, type);
|
||||
count++;
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Helper methods
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Helper function that takes an object and returns a list representation
|
||||
* of it:
|
||||
*
|
||||
* o == null => []
|
||||
* o is a list => o
|
||||
* else => [o]
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
private final static <T> List<T> toList(final Class<T> c, final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<T>)o;
|
||||
else return Collections.singletonList((T)o);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,310 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
public abstract class BCF2FieldWriter {
|
||||
private final VCFHeader header;
|
||||
private final BCF2FieldEncoder fieldEncoder;
|
||||
|
||||
protected BCF2FieldWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
this.header = header;
|
||||
this.fieldEncoder = fieldEncoder;
|
||||
}
|
||||
|
||||
protected VCFHeader getHeader() { return header; }
|
||||
protected BCF2FieldEncoder getFieldEncoder() {
|
||||
return fieldEncoder;
|
||||
}
|
||||
protected String getField() { return getFieldEncoder().getField(); }
|
||||
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
fieldEncoder.writeFieldKey(encoder);
|
||||
}
|
||||
|
||||
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { } // TODO -- overload done so that we null out values and test for correctness
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BCF2FieldWriter " + getClass().getSimpleName() + " with encoder " + getFieldEncoder();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Sites writers
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static abstract class SiteWriter extends BCF2FieldWriter {
|
||||
protected SiteWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
}
|
||||
|
||||
public abstract void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException;
|
||||
}
|
||||
|
||||
public static class GenericSiteWriter extends SiteWriter {
|
||||
public GenericSiteWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
final Object rawValue = vc.getAttribute(getField(), null);
|
||||
final BCF2Type type = getFieldEncoder().getType(rawValue);
|
||||
if ( rawValue == null ) {
|
||||
// the value is missing, just write in null
|
||||
encoder.encodeType(0, type);
|
||||
} else {
|
||||
final int valueCount = getFieldEncoder().numElements(vc, rawValue);
|
||||
encoder.encodeType(valueCount, type);
|
||||
getFieldEncoder().encodeOneValue(encoder, rawValue, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Genotypes writers
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static abstract class GenotypesWriter extends BCF2FieldWriter {
|
||||
int nValuesPerGenotype = -1;
|
||||
BCF2Type encodingType = null;
|
||||
|
||||
protected GenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
|
||||
if ( fieldEncoder.hasConstantNumElements() ) {
|
||||
nValuesPerGenotype = getFieldEncoder().numElements();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
// writes the key information
|
||||
super.start(encoder, vc);
|
||||
|
||||
// only update if we need to
|
||||
if ( ! getFieldEncoder().hasConstantNumElements() ) {
|
||||
if ( getFieldEncoder().hasContextDeterminedNumElements() )
|
||||
// we are cheap -- just depends on genotype of allele counts
|
||||
nValuesPerGenotype = getFieldEncoder().numElements(vc);
|
||||
else
|
||||
// we have to go fishing through the values themselves (expensive)
|
||||
nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc);
|
||||
}
|
||||
|
||||
encoder.encodeType(nValuesPerGenotype, encodingType);
|
||||
}
|
||||
|
||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||
final Object fieldValue = g.getExtendedAttribute(getField(), null);
|
||||
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
|
||||
}
|
||||
|
||||
protected int numElements(final VariantContext vc, final Genotype g) {
|
||||
return getFieldEncoder().numElements(vc, g.getExtendedAttribute(getField()));
|
||||
}
|
||||
|
||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
|
||||
int size = -1;
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
size = Math.max(size, numElements(vc, g));
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
public static class StaticallyTypeGenotypesWriter extends GenotypesWriter {
|
||||
public StaticallyTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
encodingType = getFieldEncoder().getStaticType();
|
||||
}
|
||||
}
|
||||
|
||||
public static class IntegerTypeGenotypesWriter extends GenotypesWriter {
|
||||
public IntegerTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
// the only value that is dynamic are integers
|
||||
final List<Integer> values = new ArrayList<Integer>(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
for ( final Object i : BCF2Utils.toList(g.getExtendedAttribute(getField(), null)) ) {
|
||||
values.add((Integer)i); // we know they are all integers
|
||||
}
|
||||
}
|
||||
|
||||
encodingType = BCF2Utils.determineIntegerType(values);
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
}
|
||||
|
||||
public static class IGFGenotypesWriter extends GenotypesWriter {
|
||||
final IntGenotypeFieldAccessors.Accessor ige;
|
||||
|
||||
public IGFGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder, final IntGenotypeFieldAccessors.Accessor ige) {
|
||||
super(header, fieldEncoder);
|
||||
this.ige = ige;
|
||||
|
||||
if ( ! (fieldEncoder instanceof BCF2FieldEncoder.IntArray) )
|
||||
throw new ReviewedStingException("BUG: IntGenotypesWriter requires IntArray encoder for field " + getField());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
// TODO
|
||||
// TODO this piece of code consumes like 10% of the runtime alone because fo the vc.getGenotypes() iteration
|
||||
// TODO
|
||||
encodingType = BCF2Type.INT8;
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final int[] pls = ige.getValues(g);
|
||||
final BCF2Type plsType = getFieldEncoder().getType(pls);
|
||||
encodingType = BCF2Utils.maxIntegerType(encodingType, plsType);
|
||||
if ( encodingType == BCF2Type.INT32 )
|
||||
break; // stop early
|
||||
}
|
||||
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||
getFieldEncoder().encodeValue(encoder, ige.getValues(g), encodingType, nValuesPerGenotype);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int numElements(final VariantContext vc, final Genotype g) {
|
||||
return ige.getSize(g);
|
||||
}
|
||||
}
|
||||
|
||||
public static class GTWriter extends GenotypesWriter {
|
||||
final Map<Allele, Integer> alleleMapForTriPlus = new HashMap<Allele, Integer>(5);
|
||||
Allele ref, alt1;
|
||||
|
||||
public GTWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES )
|
||||
throw new ReviewedStingException("Current BCF2 encoder cannot handle sites " +
|
||||
"with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have "
|
||||
+ vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
|
||||
|
||||
encodingType = BCF2Type.INT8;
|
||||
buildAlleleMap(vc);
|
||||
nValuesPerGenotype = vc.getMaxPloidy();
|
||||
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||
final int samplePloidy = g.getPloidy();
|
||||
for ( int i = 0; i < nValuesPerGenotype; i++ ) {
|
||||
if ( i < samplePloidy ) {
|
||||
// we encode the actual allele
|
||||
final Allele a = g.getAllele(i);
|
||||
final int offset = getAlleleOffset(a);
|
||||
final int encoded = ((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00);
|
||||
encoder.encodeRawBytes(encoded, encodingType);
|
||||
} else {
|
||||
// we need to pad with missing as we have ploidy < max for this sample
|
||||
encoder.encodeRawBytes(encodingType.getMissingBytes(), encodingType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast path code to determine the offset.
|
||||
*
|
||||
* Inline tests for == against ref (most common, first test)
|
||||
* == alt1 (second most common, second test)
|
||||
* == NO_CALL (third)
|
||||
* and finally in the map from allele => offset for all alt 2+ alleles
|
||||
*
|
||||
* @param a the allele whose offset we wish to determine
|
||||
* @return the offset (from 0) of the allele in the list of variant context alleles (-1 means NO_CALL)
|
||||
*/
|
||||
@Requires("a != null")
|
||||
private final int getAlleleOffset(final Allele a) {
|
||||
if ( a == ref ) return 0;
|
||||
else if ( a == alt1 ) return 1;
|
||||
else if ( a == Allele.NO_CALL ) return -1;
|
||||
else {
|
||||
final Integer o = alleleMapForTriPlus.get(a);
|
||||
if ( o == null ) throw new ReviewedStingException("BUG: Couldn't find allele offset for allele " + a);
|
||||
return o;
|
||||
}
|
||||
}
|
||||
|
||||
private final void buildAlleleMap(final VariantContext vc) {
|
||||
// these are fast path options to determine the offsets for
|
||||
final int nAlleles = vc.getNAlleles();
|
||||
ref = vc.getReference();
|
||||
alt1 = nAlleles > 1 ? vc.getAlternateAllele(0) : null;
|
||||
|
||||
if ( nAlleles > 2 ) {
|
||||
// for multi-allelics we need to clear the map, and add additional looks
|
||||
alleleMapForTriPlus.clear();
|
||||
alleleMapForTriPlus.put(Allele.NO_CALL, -1); // convenience for lookup
|
||||
final List<Allele> alleles = vc.getAlleles();
|
||||
for ( int i = 2; i < alleles.size(); i++ ) {
|
||||
alleleMapForTriPlus.put(alleles.get(i), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
public class BCF2FieldWriterManager {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class);
|
||||
final Map<String, BCF2FieldWriter.SiteWriter> siteWriters = new HashMap<String, BCF2FieldWriter.SiteWriter>();
|
||||
final Map<String, BCF2FieldWriter.GenotypesWriter> genotypesWriters = new HashMap<String, BCF2FieldWriter.GenotypesWriter>();
|
||||
final IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
|
||||
public BCF2FieldWriterManager() { }
|
||||
|
||||
/**
|
||||
* Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header
|
||||
*
|
||||
* Must be called before any of the getter methods will work
|
||||
*
|
||||
* @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF
|
||||
* @param encoder the encoder we are going to use to write out the BCF2 data
|
||||
* @param stringDictionary a map from VCFHeader strings to their offsets for encoding
|
||||
*/
|
||||
public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) {
|
||||
for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) {
|
||||
final String field = line.getID();
|
||||
final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary);
|
||||
add(siteWriters, field, writer);
|
||||
}
|
||||
|
||||
for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) {
|
||||
final String field = line.getID();
|
||||
final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary);
|
||||
add(genotypesWriters, field, writer);
|
||||
}
|
||||
}
|
||||
|
||||
@Requires({"field != null", "writer != null"})
|
||||
@Ensures("map.containsKey(field)")
|
||||
private final <T> void add(final Map<String, T> map, final String field, final T writer) {
|
||||
if ( map.containsKey(field) )
|
||||
throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders");
|
||||
map.put(field, writer);
|
||||
logger.info(writer);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Master routine to look at the header, a specific line, and
|
||||
// build an appropriate SiteWriter for that header element
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFHeader header,
|
||||
final VCFInfoHeaderLine line,
|
||||
final BCF2Encoder encoder,
|
||||
final Map<String, Integer> dict) {
|
||||
return new BCF2FieldWriter.GenericSiteWriter(header, createFieldEncoder(line, encoder, dict, false));
|
||||
}
|
||||
|
||||
private BCF2FieldEncoder createFieldEncoder(final VCFCompoundHeaderLine line,
|
||||
final BCF2Encoder encoder,
|
||||
final Map<String, Integer> dict,
|
||||
final boolean createGenotypesEncoders ) {
|
||||
|
||||
if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) {
|
||||
if ( line.getType() != VCFHeaderLineType.Integer )
|
||||
logger.warn("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
|
||||
return new BCF2FieldEncoder.IntArray(line, dict);
|
||||
} else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||
} else {
|
||||
switch ( line.getType() ) {
|
||||
case Character:
|
||||
case String:
|
||||
return new BCF2FieldEncoder.StringOrCharacter(line, dict);
|
||||
case Flag:
|
||||
return new BCF2FieldEncoder.Flag(line, dict);
|
||||
case Float:
|
||||
return new BCF2FieldEncoder.Float(line, dict);
|
||||
case Integer:
|
||||
if ( line.isFixedCount() && line.getCount() == 1 )
|
||||
return new BCF2FieldEncoder.AtomicInt(line, dict);
|
||||
else
|
||||
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + line.getID());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Master routine to look at the header, a specific line, and
|
||||
// build an appropriate Genotypes for that header element
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private BCF2FieldWriter.GenotypesWriter createGenotypesWriter(final VCFHeader header,
|
||||
final VCFFormatHeaderLine line,
|
||||
final BCF2Encoder encoder,
|
||||
final Map<String, Integer> dict) {
|
||||
final String field = line.getID();
|
||||
final BCF2FieldEncoder fieldEncoder = createFieldEncoder(line, encoder, dict, true);
|
||||
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
return new BCF2FieldWriter.GTWriter(header, fieldEncoder);
|
||||
} else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) {
|
||||
return new BCF2FieldWriter.IGFGenotypesWriter(header, fieldEncoder, intGenotypeFieldAccessors.getAccessor(field));
|
||||
} else if ( line.getType() == VCFHeaderLineType.Integer ) {
|
||||
return new BCF2FieldWriter.IntegerTypeGenotypesWriter(header, fieldEncoder);
|
||||
} else {
|
||||
return new BCF2FieldWriter.StaticallyTypeGenotypesWriter(header, fieldEncoder);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Accessors to get site / genotype writers
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get a site writer specialized to encode values for site info field
|
||||
* @param field key found in the VCF header INFO records
|
||||
* @return
|
||||
*/
|
||||
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
|
||||
return getWriter(field, siteWriters);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a genotypes writer specialized to encode values for genotypes field
|
||||
* @param field key found in the VCF header FORMAT records
|
||||
* @return
|
||||
*/
|
||||
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
|
||||
return getWriter(field, genotypesWriters);
|
||||
}
|
||||
|
||||
@Requires({"map != null", "key != null"})
|
||||
@Ensures("result != null")
|
||||
public <T> T getWriter(final String key, final Map<String, T> map) {
|
||||
final T writer = map.get(key);
|
||||
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
|
||||
return writer;
|
||||
}
|
||||
}
|
||||
|
|
@ -24,9 +24,11 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
|
|
@ -37,6 +39,49 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* VariantContextWriter that emits BCF2 binary encoding
|
||||
*
|
||||
* Overall structure of this writer is complex for efficiency reasons
|
||||
*
|
||||
* -- The BCF2Writer manages the low-level BCF2 encoder, the mappings
|
||||
* from contigs and strings to offsets, the VCF header, and holds the
|
||||
* lower-level encoders that map from VC and Genotype fields to their
|
||||
* specific encoders. This class also writes out the standard BCF2 fields
|
||||
* like POS, contig, the size of info and genotype data, QUAL, etc. It
|
||||
* has loops over the INFO and GENOTYPES to encode each individual datum
|
||||
* with the generic field encoders, but the actual encoding work is
|
||||
* done with by the FieldWriters classes themselves
|
||||
*
|
||||
* -- BCF2FieldWriter are specialized classes for writing out SITE and
|
||||
* genotype information for specific SITE/GENOTYPE fields (like AC for
|
||||
* sites and GQ for genotypes). These are objects in themselves because
|
||||
* the manage all of the complexity of relating the types in the VCF header
|
||||
* with the proper encoding in BCF as well as the type representing this
|
||||
* in java. Relating all three of these pieces of information together
|
||||
* is the main complexity challenge in the encoder. The piece of code
|
||||
* that determines which FieldWriters to associate with each SITE and
|
||||
* GENOTYPE field is the BCF2FieldWriterManager. These FieldWriters
|
||||
* are specialized for specific combinations of encoders (see below)
|
||||
* and contexts (genotypes) for efficiency, so they smartly manage
|
||||
* the writing of PLs (encoded as int[]) directly into the lowest
|
||||
* level BCFEncoder.
|
||||
*
|
||||
* -- At the third level is the BCF2FieldEncoder, relatively simple
|
||||
* pieces of code that handle the task of determining the right
|
||||
* BCF2 type for specific field values, as well as reporting back
|
||||
* information such as the number of elements used to encode it
|
||||
* (simple for atomic values like Integer but complex for PLs
|
||||
* or lists of strings)
|
||||
*
|
||||
* -- At the lowest level is the BCF2Encoder itself. This provides
|
||||
* just the limited encoding methods specified by the BCF2 specification. This encoder
|
||||
* doesn't do anything but make it possible to conveniently write out valid low-level
|
||||
* BCF2 constructs.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
|
||||
|
|
@ -45,8 +90,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private final Map<String, Integer> contigDictionary = new HashMap<String, Integer>();
|
||||
private final Map<String, Integer> stringDictionaryMap = new LinkedHashMap<String, Integer>();
|
||||
private final boolean doNotWriteGenotypes;
|
||||
private String[] sampleNames = null;
|
||||
|
||||
private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
|
||||
final BCF2FieldWriterManager fieldManager = new BCF2FieldWriterManager();
|
||||
|
||||
public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
|
||||
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
|
||||
|
|
@ -60,11 +107,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final void createContigDictionary(final Collection<VCFContigHeaderLine> contigLines) {
|
||||
for ( final VCFContigHeaderLine contig : contigLines )
|
||||
contigDictionary.put(contig.getID(), contig.getContigIndex());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeHeader(final VCFHeader header) {
|
||||
// create the config offsets map
|
||||
|
|
@ -81,6 +123,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
stringDictionaryMap.put(dict.get(i), i);
|
||||
}
|
||||
|
||||
sampleNames = header.getGenotypeSamples().toArray(new String[header.getNGenotypeSamples()]);
|
||||
|
||||
// setup the field encodings
|
||||
fieldManager.setup(header, encoder, stringDictionaryMap);
|
||||
|
||||
try {
|
||||
// write out the header into a byte stream, get it's length, and write everything to the file
|
||||
final ByteArrayOutputStream capture = new ByteArrayOutputStream();
|
||||
|
|
@ -91,7 +138,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
final byte[] headerBytes = capture.toByteArray();
|
||||
outputStream.write(BCF2Utils.MAGIC_HEADER_LINE);
|
||||
BCF2Encoder.encodePrimitive(headerBytes.length, BCF2Type.INT32, outputStream);
|
||||
BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
|
||||
outputStream.write(headerBytes);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile("BCF2 stream", "Got IOException while trying to write BCF2 header", e);
|
||||
|
|
@ -99,8 +146,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void add( final VariantContext initialVC ) {
|
||||
final VariantContext vc = initialVC.fullyDecode(header);
|
||||
public void add( VariantContext vc ) {
|
||||
if ( doNotWriteGenotypes )
|
||||
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
||||
vc = vc.fullyDecode(header);
|
||||
|
||||
super.add(vc); // allow on the fly indexing
|
||||
|
||||
try {
|
||||
|
|
@ -162,11 +212,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
// info fields
|
||||
final int nAlleles = vc.getNAlleles();
|
||||
final int nInfo = vc.getAttributes().size();
|
||||
final int nGenotypeFormatFields = VCFWriter.calcVCFGenotypeKeys(vc, header).size();
|
||||
final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
|
||||
final int nSamples = vc.getNSamples();
|
||||
|
||||
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x00FF), BCF2Type.INT32);
|
||||
encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x0FFF), BCF2Type.INT32);
|
||||
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32);
|
||||
encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32);
|
||||
|
||||
buildID(vc);
|
||||
buildAlleles(vc);
|
||||
|
|
@ -176,15 +226,41 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
return encoder.getRecordBytes();
|
||||
}
|
||||
|
||||
private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
|
||||
if ( vc.getGenotypes().isLazyWithData() ) {
|
||||
LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
|
||||
if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
|
||||
return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to get the nGenotypeFields as efficiently as possible.
|
||||
*
|
||||
* If this is a lazy BCF2 object just grab the field count from there,
|
||||
* otherwise do the whole counting by types test in the actual data
|
||||
*
|
||||
* @param vc
|
||||
* @return
|
||||
*/
|
||||
private final int getNGenotypeFormatFields(final VariantContext vc) {
|
||||
final BCF2Codec.LazyData lazyData = getLazyData(vc);
|
||||
return lazyData != null ? lazyData.nGenotypeFields : VCFWriter.calcVCFGenotypeKeys(vc, header).size();
|
||||
}
|
||||
|
||||
private void buildID( VariantContext vc ) throws IOException {
|
||||
encoder.encodeTyped(vc.getID(), BCF2Type.CHAR);
|
||||
encoder.encodeTypedString(vc.getID());
|
||||
}
|
||||
|
||||
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||
final boolean needsPadding = VariantContextUtils.needsPadding(vc);
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
final String s = needsPadding ? VariantContextUtils.padAllele(vc,allele) : allele.getDisplayString();
|
||||
encoder.encodeTyped(s, BCF2Type.CHAR);
|
||||
byte[] s = allele.getBases();
|
||||
if ( needsPadding )
|
||||
s = VariantContextUtils.padAllele(vc,allele).getBytes();
|
||||
encoder.encodeTypedString(s);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -199,233 +275,43 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private void buildInfo( VariantContext vc ) throws IOException {
|
||||
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
|
||||
final String key = infoFieldEntry.getKey();
|
||||
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(key, infoFieldEntry.getValue());
|
||||
|
||||
encodeStringByRef(key);
|
||||
encoder.encodeTyped(encoding.valuesToEncode, encoding.BCF2Type);
|
||||
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key);
|
||||
writer.start(encoder, vc);
|
||||
writer.site(encoder, vc);
|
||||
writer.done(encoder, vc);
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
for ( final String field : genotypeFields ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
addGenotypes(vc);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
addGQ(vc);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
addGenotypeFilters(vc);
|
||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
addPLs(vc);
|
||||
} else {
|
||||
addGenericGenotypeField(vc, field);
|
||||
}
|
||||
}
|
||||
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
|
||||
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
|
||||
final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field);
|
||||
assert metaData != null; // field is supposed to be in header
|
||||
|
||||
int nFields = metaData.getCount(vc.getNAlleles() - 1);
|
||||
if ( nFields == -1 ) { // unbounded, need to look at values
|
||||
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
|
||||
final BCF2Codec.LazyData lazyData = getLazyData(vc);
|
||||
if ( lazyData != null ) {
|
||||
// we never decoded any data from this BCF file, so just pass it back
|
||||
return lazyData.bytes;
|
||||
} else {
|
||||
return nFields;
|
||||
}
|
||||
}
|
||||
// we have to do work to convert the VC into a BCF2 byte stream
|
||||
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
for ( final String field : genotypeFields ) {
|
||||
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
|
||||
|
||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final String field, final VariantContext vc) {
|
||||
int size = -1;
|
||||
final GenotypesContext gc = vc.getGenotypes();
|
||||
|
||||
for ( final Genotype g : gc ) {
|
||||
final Object o = g.getAttribute(field);
|
||||
if ( o == null ) continue;
|
||||
if ( o instanceof List ) {
|
||||
// only do compute if first value is of type list
|
||||
size = Math.max(size, ((List)o).size());
|
||||
} else if ( size == -1 )
|
||||
size = 1;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
private final void addGenericGenotypeField(final VariantContext vc, final String field) throws IOException {
|
||||
final int numInFormatField = getNGenotypeFieldValues(field, vc);
|
||||
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(field, null);
|
||||
|
||||
startGenotypeField(field, numInFormatField, encoding.BCF2Type);
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
try {
|
||||
final Object fieldValue = g.getAttribute(field);
|
||||
|
||||
if ( numInFormatField == 1 ) {
|
||||
// we encode the actual allele, encodeRawValue handles the missing case where fieldValue == null
|
||||
encoder.encodeRawValue(fieldValue, encoding.BCF2Type);
|
||||
} else {
|
||||
// multiple values, need to handle general case
|
||||
final List<Object> asList = toList(fieldValue);
|
||||
final int nSampleValues = asList.size();
|
||||
for ( int i = 0; i < numInFormatField; i++ ) {
|
||||
encoder.encodeRawValue(i < nSampleValues ? asList.get(i) : null, encoding.BCF2Type);
|
||||
}
|
||||
writer.start(encoder, vc);
|
||||
for ( final String name : sampleNames ) {
|
||||
Genotype g = vc.getGenotype(name);
|
||||
if ( g == null )
|
||||
// we don't have any data about g at all
|
||||
g = new GenotypeBuilder(name).make();
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new ReviewedStingException("Value stored in VariantContext incompatible with VCF header type for field " + field, e);
|
||||
writer.done(encoder, vc);
|
||||
}
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
}
|
||||
|
||||
private final static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
}
|
||||
|
||||
private final class VCFToBCFEncoding {
|
||||
VCFHeaderLineType vcfType;
|
||||
BCF2Type BCF2Type;
|
||||
List<? extends Object> valuesToEncode;
|
||||
|
||||
private VCFToBCFEncoding(final VCFHeaderLineType vcfType, final BCF2Type BCF2Type, final List<? extends Object> valuesToEncode) {
|
||||
this.vcfType = vcfType;
|
||||
this.BCF2Type = BCF2Type;
|
||||
this.valuesToEncode = valuesToEncode;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO -- we really need explicit converters as first class objects
|
||||
// TODO -- need to generalize so we can enable vectors of compressed genotype ints
|
||||
// TODO -- no sense in allocating these over and over
|
||||
private final VCFToBCFEncoding prepFieldValueForEncoding(final String field, final Object value) {
|
||||
final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field);
|
||||
final boolean isList = value instanceof List;
|
||||
final Object toType = isList ? ((List)value).get(0) : value;
|
||||
|
||||
try {
|
||||
switch ( metaData.getType() ) {
|
||||
case Character:
|
||||
assert toType instanceof String;
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, Collections.singletonList(value));
|
||||
case Flag:
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.INT8, Collections.singletonList(1));
|
||||
case String:
|
||||
final List<String> s = isList ? (List<String>)value : Collections.singletonList((String) value);
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, s);
|
||||
case Integer: // note integer calculation is a bit complex because of the need to determine sizes
|
||||
List<Integer> l;
|
||||
BCF2Type intType;
|
||||
if ( isList ) {
|
||||
l = (List<Integer>)value;
|
||||
intType = encoder.determineIntegerType(l);
|
||||
} else if ( value != null ) {
|
||||
intType = encoder.determineIntegerType((Integer)value);
|
||||
l = Collections.singletonList((Integer)value);
|
||||
} else {
|
||||
intType = BCF2Type.INT8;
|
||||
l = Collections.singletonList((Integer) null);
|
||||
}
|
||||
return new VCFToBCFEncoding(metaData.getType(), intType, l);
|
||||
case Float:
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.FLOAT, isList ? (List<Double>)value : Collections.singletonList(value));
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + field);
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new ReviewedStingException("Error computing VCF -> BCF encoding. Received cast class exception"
|
||||
+ " indicating that the VCF header for " + metaData + " is inconsistent with the" +
|
||||
" value seen in the VariantContext object = " + value, e);
|
||||
}
|
||||
}
|
||||
|
||||
private final void addGenotypeFilters(final VariantContext vc) throws IOException {
|
||||
logger.warn("Skipping genotype filter field");
|
||||
// // TODO -- FIXME -- string is wrong here -- need to compute string size...
|
||||
// startGenotypeField(VCFConstants.GENOTYPE_FILTER_KEY, 1, BCFType.CHAR);
|
||||
// for ( final Genotype g : vc.getGenotypes() ) {
|
||||
// if ( g.filtersWereApplied() && g.isFiltered() ) {
|
||||
// encoder.encodeString(ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())));
|
||||
// } else {
|
||||
// encoder.encodeRawMissingValues(1, BCFType.CHAR); // todo fixme
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
private final void addGQ(final VariantContext vc) throws IOException {
|
||||
startGenotypeField(VCFConstants.GENOTYPE_QUALITY_KEY, 1, BCF2Type.INT8);
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
if ( g.hasLog10PError() ) {
|
||||
final int GQ = Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL);
|
||||
if ( GQ > VCFConstants.MAX_GENOTYPE_QUAL ) throw new ReviewedStingException("Unexpectedly large GQ " + GQ + " at " + vc);
|
||||
encoder.encodeRawValue(GQ, BCF2Type.INT8);
|
||||
} else {
|
||||
encoder.encodeRawMissingValues(1, BCF2Type.INT8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Horrible special case to deal with the GenotypeLikelihoods class
|
||||
* @param vc
|
||||
* @throws IOException
|
||||
*/
|
||||
private final void addPLs(final VariantContext vc) throws IOException {
|
||||
final String field = VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY;
|
||||
final int numPLs = getNGenotypeFieldValues(field, vc);
|
||||
final int[] allPLs = new int[numPLs * vc.getNSamples()];
|
||||
|
||||
// collect all of the PLs into a single vector of values
|
||||
int i = 0;
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
final GenotypeLikelihoods gls = g.getLikelihoods();
|
||||
final int[] pls = gls != null ? g.getLikelihoods().getAsPLs() : null;
|
||||
if ( pls == null )
|
||||
for ( int j = 0; j < numPLs; j++) allPLs[i++] = -1;
|
||||
else
|
||||
for ( int pl : pls ) allPLs[i++] = pl;
|
||||
}
|
||||
|
||||
// determine the best size
|
||||
final BCF2Type type = encoder.determineIntegerType(allPLs);
|
||||
startGenotypeField(field, numPLs, type);
|
||||
for ( int pl : allPLs )
|
||||
encoder.encodePrimitive(pl == -1 ? type.getMissingBytes() : pl, type);
|
||||
}
|
||||
|
||||
private final void addGenotypes(final VariantContext vc) throws IOException {
|
||||
if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES )
|
||||
throw new ReviewedStingException("Current BCF2 encoder cannot handle sites " +
|
||||
"with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have "
|
||||
+ vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
|
||||
|
||||
final Map<Allele, String> alleleMap = VCFWriter.buildAlleleMap(vc);
|
||||
final int maxPloidy = vc.getMaxPloidy();
|
||||
startGenotypeField(VCFConstants.GENOTYPE_KEY, maxPloidy, BCF2Type.INT8);
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
final List<Allele> alleles = g.getAlleles();
|
||||
final int samplePloidy = alleles.size();
|
||||
for ( int i = 0; i < maxPloidy; i++ ) {
|
||||
if ( i < samplePloidy ) {
|
||||
// we encode the actual allele
|
||||
final Allele a = alleles.get(i);
|
||||
final int offset = a.isNoCall() ? -1 : Integer.valueOf(alleleMap.get(a));
|
||||
final int encoded = ((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00);
|
||||
encoder.encodePrimitive(encoded, BCF2Type.INT8);
|
||||
} else {
|
||||
// we need to pad with missing as we have ploidy < max for this sample
|
||||
encoder.encodePrimitive(BCF2Type.INT8.getMissingBytes(), BCF2Type.INT8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Low-level block encoding
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Write the data in the encoder to the outputstream as a length encoded
|
||||
|
|
@ -434,29 +320,18 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires({"infoBlock.length > 0", "genotypesBlock.length >= 0"})
|
||||
private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException {
|
||||
assert infoBlock.length > 0;
|
||||
assert genotypesBlock.length >= 0;
|
||||
|
||||
BCF2Encoder.encodePrimitive(infoBlock.length, BCF2Type.INT32, outputStream);
|
||||
BCF2Encoder.encodePrimitive(genotypesBlock.length, BCF2Type.INT32, outputStream);
|
||||
BCF2Utils.encodeRawBytes(infoBlock.length, BCF2Type.INT32, outputStream);
|
||||
BCF2Utils.encodeRawBytes(genotypesBlock.length, BCF2Type.INT32, outputStream);
|
||||
outputStream.write(infoBlock);
|
||||
outputStream.write(genotypesBlock);
|
||||
}
|
||||
|
||||
// TODO -- obvious optimization case
|
||||
private final BCF2Type encodeStringByRef(final String string) throws IOException {
|
||||
assert string != null;
|
||||
|
||||
return encodeStringsByRef(Collections.singletonList(string));
|
||||
}
|
||||
|
||||
// TODO -- in size == 1 case branch to singleoton fast-path
|
||||
@Requires("! strings.isEmpty()")
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
private final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
||||
assert ! strings.isEmpty();
|
||||
|
||||
final List<Integer> offsets = new ArrayList<Integer>(strings.size());
|
||||
BCF2Type maxType = BCF2Type.INT8; // start with the smallest size
|
||||
|
||||
// iterate over strings until we find one that needs 16 bits, and break
|
||||
for ( final String string : strings ) {
|
||||
|
|
@ -464,28 +339,22 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
if ( got == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF");
|
||||
final int offset = got;
|
||||
offsets.add(offset);
|
||||
|
||||
if ( maxType != BCF2Type.INT32) { // don't bother looking if we already are at 32 bit ints
|
||||
final BCF2Type type1 = encoder.determineIntegerType(offset);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: if ( maxType == BCF2Type.INT8 ) maxType = BCF2Type.INT16; break;
|
||||
case INT32: maxType = BCF2Type.INT32; break;
|
||||
default: throw new ReviewedStingException("Unexpected type " + type1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we've checked the types for all strings, so write them out
|
||||
encoder.encodeTyped(offsets, maxType);
|
||||
return maxType;
|
||||
final BCF2Type type = BCF2Utils.determineIntegerType(offsets);
|
||||
encoder.encodeTyped(offsets, type);
|
||||
return type;
|
||||
}
|
||||
|
||||
private final void startGenotypeField(final String key, final int size, final BCF2Type valueType) throws IOException {
|
||||
assert key != null && ! key.equals("");
|
||||
assert size >= 0;
|
||||
|
||||
encodeStringByRef(key);
|
||||
encoder.encodeType(size, valueType);
|
||||
/**
|
||||
* Create the contigDictionary from the contigLines extracted from the VCF header
|
||||
*
|
||||
* @param contigLines
|
||||
*/
|
||||
@Requires("contigDictionary.isEmpty()")
|
||||
private final void createContigDictionary(final Collection<VCFContigHeaderLine> contigLines) {
|
||||
int offset = 0;
|
||||
for ( VCFContigHeaderLine contig : contigLines )
|
||||
contigDictionary.put(contig.getID(), offset++);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -159,4 +159,10 @@ final class PositionalOutputStream extends OutputStream {
|
|||
}
|
||||
|
||||
public final long getPosition() { return position; }
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* A convenient way to provide a single view on the many int and int[] field values we work with,
|
||||
* for writing out the values. This class makes writing out the inline AD, GQ, PL, DP fields
|
||||
* easy and fast
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 6/12
|
||||
*/
|
||||
class IntGenotypeFieldAccessors {
|
||||
// initialized once per writer to allow parallel writers to work
|
||||
private final HashMap<String, Accessor> intGenotypeFieldEncoders = new HashMap<String, Accessor>();
|
||||
|
||||
public IntGenotypeFieldAccessors() {
|
||||
intGenotypeFieldEncoders.put(VCFConstants.DEPTH_KEY, new IntGenotypeFieldAccessors.DPAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new IntGenotypeFieldAccessors.ADAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new IntGenotypeFieldAccessors.PLAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_QUALITY_KEY, new IntGenotypeFieldAccessors.GQAccessor());
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an accessor for field, or null if none exists
|
||||
* @param field
|
||||
* @return
|
||||
*/
|
||||
public Accessor getAccessor(final String field) {
|
||||
return intGenotypeFieldEncoders.get(field);
|
||||
}
|
||||
|
||||
public static abstract class Accessor {
|
||||
public abstract int[] getValues(final Genotype g);
|
||||
|
||||
public final int getSize(final Genotype g) {
|
||||
final int[] v = getValues(g);
|
||||
return v == null ? 0 : v.length;
|
||||
}
|
||||
}
|
||||
|
||||
private static abstract class AtomicAccessor extends Accessor {
|
||||
private final int[] singleton = new int[1];
|
||||
|
||||
@Override
|
||||
public int[] getValues(final Genotype g) {
|
||||
singleton[0] = getValue(g);
|
||||
return singleton[0] == -1 ? null : singleton;
|
||||
}
|
||||
|
||||
public abstract int getValue(final Genotype g);
|
||||
}
|
||||
|
||||
public static class GQAccessor extends AtomicAccessor {
|
||||
@Override public int getValue(final Genotype g) { return Math.min(g.getGQ(), VCFConstants.MAX_GENOTYPE_QUAL); }
|
||||
}
|
||||
|
||||
public static class DPAccessor extends AtomicAccessor {
|
||||
@Override public int getValue(final Genotype g) { return g.getDP(); }
|
||||
}
|
||||
|
||||
public static class ADAccessor extends Accessor {
|
||||
@Override public int[] getValues(final Genotype g) { return g.getAD(); }
|
||||
}
|
||||
|
||||
public static class PLAccessor extends Accessor {
|
||||
@Override public int[] getValues(final Genotype g) { return g.getPL(); }
|
||||
}
|
||||
}
|
||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -53,28 +54,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
// were filters applied?
|
||||
protected boolean filtersWereAppliedToContext = false;
|
||||
|
||||
// /**
|
||||
// * create a VCF writer, given a file to write to
|
||||
// *
|
||||
// * @param location the file location to write to
|
||||
// */
|
||||
// public StandardVCFWriter(final File location, final SAMSequenceDictionary refDict) {
|
||||
// this(location, openOutputStream(location), refDict, true, false);
|
||||
// }
|
||||
//
|
||||
// public StandardVCFWriter(File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing) {
|
||||
// this(location, openOutputStream(location), refDict, enableOnTheFlyIndexing, false);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * create a VCF writer, given a stream to write to
|
||||
// *
|
||||
// * @param output the file location to write to
|
||||
// * @param doNotWriteGenotypes do not write genotypes
|
||||
// */
|
||||
// public StandardVCFWriter(final OutputStream output, final SAMSequenceDictionary refDict, final boolean doNotWriteGenotypes) {
|
||||
// this(null, output, refDict, false, doNotWriteGenotypes);
|
||||
// }
|
||||
private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
|
||||
public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
|
||||
|
|
@ -230,7 +210,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
if ( !vc.hasLog10PError() )
|
||||
mWriter.write(VCFConstants.MISSING_VALUE_v4);
|
||||
else
|
||||
mWriter.write(getQualValue(vc.getPhredScaledQual()));
|
||||
mWriter.write(formatQualValue(vc.getPhredScaledQual()));
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// FILTER
|
||||
|
|
@ -250,7 +230,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
|
||||
// FORMAT
|
||||
final GenotypesContext gc = vc.getGenotypes();
|
||||
if ( gc instanceof LazyGenotypesContext && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() != null) {
|
||||
if ( gc.isLazyWithData() && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() instanceof String ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
mWriter.write(((LazyGenotypesContext)gc).getUnparsedGenotypeData().toString());
|
||||
} else {
|
||||
|
|
@ -272,7 +252,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
}
|
||||
}
|
||||
|
||||
public static Map<Allele, String> buildAlleleMap(final VariantContext vc) {
|
||||
private static Map<Allele, String> buildAlleleMap(final VariantContext vc) {
|
||||
final Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size()+1);
|
||||
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
||||
|
||||
|
|
@ -298,10 +278,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
return vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (forcePASS || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
private String getQualValue(double qual) {
|
||||
String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual);
|
||||
if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) )
|
||||
s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length());
|
||||
private static final String QUAL_FORMAT_STRING = "%.2f";
|
||||
private static final String QUAL_FORMAT_EXTENSION_TO_TRIM = ".00";
|
||||
|
||||
private String formatQualValue(double qual) {
|
||||
String s = String.format(QUAL_FORMAT_STRING, qual);
|
||||
if ( s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM) )
|
||||
s = s.substring(0, s.length() - QUAL_FORMAT_EXTENSION_TO_TRIM.length());
|
||||
return s;
|
||||
}
|
||||
|
||||
|
|
@ -347,6 +330,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
*/
|
||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||
throws IOException {
|
||||
if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) {
|
||||
final List<String> badSampleNames = new ArrayList<String>();
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) )
|
||||
badSampleNames.add(g.getSampleName());
|
||||
throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames));
|
||||
}
|
||||
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
|
@ -360,9 +350,9 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
}
|
||||
|
||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
for ( String key : genotypeFormatKeys ) {
|
||||
for ( String field : genotypeFormatKeys ) {
|
||||
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
if ( !g.isAvailable() ) {
|
||||
throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record");
|
||||
}
|
||||
|
|
@ -376,36 +366,50 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
continue;
|
||||
}
|
||||
|
||||
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
// some exceptions
|
||||
if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
if ( ! g.hasLog10PError() )
|
||||
val = VCFConstants.MISSING_VALUE_v4;
|
||||
String outputValue;
|
||||
final IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.getAccessor(field);
|
||||
if ( accessor != null ) {
|
||||
final int[] intValues = accessor.getValues(g);
|
||||
if ( intValues == null )
|
||||
outputValue = VCFConstants.MISSING_VALUE_v4;
|
||||
else if ( intValues.length == 1 ) // fast path
|
||||
outputValue = Integer.toString(intValues[0]);
|
||||
else {
|
||||
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
|
||||
}
|
||||
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
|
||||
for ( int i = 1; i < numInFormatField; i++ ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(intValues[0]);
|
||||
for ( int i = 1; i < intValues.length; i++) {
|
||||
sb.append(",");
|
||||
sb.append(VCFConstants.MISSING_VALUE_v4);
|
||||
sb.append(intValues[i]);
|
||||
}
|
||||
val = sb.toString();
|
||||
outputValue = sb.toString();
|
||||
}
|
||||
} else {
|
||||
Object val = g.hasExtendedAttribute(field) ? g.getExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
// some exceptions
|
||||
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
|
||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
|
||||
if ( metaData != null ) {
|
||||
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
|
||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||
// For example, if Number=2, the string has to be ".,."
|
||||
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
|
||||
for ( int i = 1; i < numInFormatField; i++ ) {
|
||||
sb.append(",");
|
||||
sb.append(VCFConstants.MISSING_VALUE_v4);
|
||||
}
|
||||
val = sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// assume that if key is absent, then the given string encoding suffices
|
||||
outputValue = formatVCFField(val);
|
||||
}
|
||||
|
||||
// assume that if key is absent, then the given string encoding suffices
|
||||
String outputValue = formatVCFField(val);
|
||||
if ( outputValue != null )
|
||||
attrs.add(outputValue);
|
||||
}
|
||||
|
|
@ -438,12 +442,41 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
mWriter.write(encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a double value and pretty prints it to a String for display
|
||||
*
|
||||
* Large doubles => gets %.2f style formatting
|
||||
* Doubles < 1 / 10 but > 1/100 </>=> get %.3f style formatting
|
||||
* Double < 1/100 => %.3e formatting
|
||||
* @param d
|
||||
* @return
|
||||
*/
|
||||
public static final String formatVCFDouble(final double d) {
|
||||
String format;
|
||||
if ( d < 1 ) {
|
||||
if ( d < 0.01 ) {
|
||||
if ( Math.abs(d) >= 1e-20 )
|
||||
format = "%.3e";
|
||||
else {
|
||||
// return a zero format
|
||||
return "0.00";
|
||||
}
|
||||
} else {
|
||||
format = "%.3f";
|
||||
}
|
||||
} else {
|
||||
format = "%.2f";
|
||||
}
|
||||
|
||||
return String.format(format, d);
|
||||
}
|
||||
|
||||
public static String formatVCFField(Object val) {
|
||||
String result;
|
||||
if ( val == null )
|
||||
result = VCFConstants.MISSING_VALUE_v4;
|
||||
else if ( val instanceof Double )
|
||||
result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val);
|
||||
result = formatVCFDouble((Double) val);
|
||||
else if ( val instanceof Boolean )
|
||||
result = (Boolean)val ? "" : null; // empty string for true, null for false
|
||||
else if ( val instanceof List ) {
|
||||
|
|
@ -475,21 +508,24 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
boolean sawGoodGT = false;
|
||||
boolean sawGoodQual = false;
|
||||
boolean sawGenotypeFilter = false;
|
||||
boolean sawDP = false;
|
||||
boolean sawAD = false;
|
||||
boolean sawPL = false;
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
keys.addAll(g.getAttributes().keySet());
|
||||
if ( g.isAvailable() )
|
||||
sawGoodGT = true;
|
||||
if ( g.hasLog10PError() )
|
||||
sawGoodQual = true;
|
||||
if (g.isFiltered() && g.isCalled())
|
||||
sawGenotypeFilter = true;
|
||||
keys.addAll(g.getExtendedAttributes().keySet());
|
||||
if ( g.isAvailable() ) sawGoodGT = true;
|
||||
if ( g.hasGQ() ) sawGoodQual = true;
|
||||
if ( g.hasDP() ) sawDP = true;
|
||||
if ( g.hasAD() ) sawAD = true;
|
||||
if ( g.hasPL() ) sawPL = true;
|
||||
if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true;
|
||||
}
|
||||
|
||||
if ( sawGoodQual )
|
||||
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
|
||||
if (sawGenotypeFilter)
|
||||
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
if ( sawGoodQual ) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
if ( sawDP ) keys.add(VCFConstants.DEPTH_KEY);
|
||||
if ( sawAD ) keys.add(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
|
||||
if ( sawPL ) keys.add(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
if ( sawGenotypeFilter ) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
|
||||
List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));
|
||||
|
||||
|
|
|
|||
|
|
@ -87,8 +87,10 @@ public abstract class BaseTest {
|
|||
private static final String networkTempDir;
|
||||
private static final File networkTempDirFile;
|
||||
|
||||
public static final File testDirFile = new File("public/testdata/");
|
||||
protected static final String testDirRelative = "public/testdata/";
|
||||
public static final File testDirFile = new File(testDirRelative);
|
||||
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
||||
protected static final String testDirRoot = testDir.replace(testDirRelative, "");
|
||||
|
||||
public static final String keysDataLocation = validationDataLocation + "keys/";
|
||||
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
|
||||
|
|
|
|||
|
|
@ -25,14 +25,13 @@
|
|||
package org.broadinstitute.sting;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.io.*;
|
||||
import java.math.BigInteger;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
|
|
@ -44,14 +43,46 @@ import java.util.Arrays;
|
|||
* Utilities for manipulating the MD5 database of previous results
|
||||
*/
|
||||
public class MD5DB {
|
||||
public static final Logger logger = Logger.getLogger(MD5DB.class);
|
||||
|
||||
/**
|
||||
* Subdirectory under the ant build directory where we store integration test md5 results
|
||||
*/
|
||||
private static final int MAX_RECORDS_TO_READ = 10000;
|
||||
private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = 1000;
|
||||
private static final int MAX_RECORDS_TO_READ = 100000;
|
||||
private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = -1;
|
||||
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
|
||||
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
|
||||
|
||||
// tracking and emitting a data file of origina and new md5s
|
||||
private final File MD5MismatchesFile;
|
||||
private final PrintStream md5MismatchStream;
|
||||
|
||||
public MD5DB() {
|
||||
this(new File(MD5DB.LOCAL_MD5_DB_DIR + "/md5mismatches.txt"));
|
||||
}
|
||||
|
||||
public MD5DB(final File MD5MismatchesFile) {
|
||||
this.MD5MismatchesFile = MD5MismatchesFile;
|
||||
|
||||
ensureMd5DbDirectory();
|
||||
|
||||
logger.debug("Creating md5 mismatch db at " + MD5MismatchesFile);
|
||||
try {
|
||||
md5MismatchStream = new PrintStream(new FileOutputStream(MD5MismatchesFile));
|
||||
md5MismatchStream.printf("%s\t%s\t%s%n", "expected", "observed", "test");
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new ReviewedStingException("Failed to open md5 mismatch file", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if ( md5MismatchStream != null ) {
|
||||
logger.debug("Closeing md5 mismatch db at " + MD5MismatchesFile);
|
||||
md5MismatchStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// MD5 DB stuff
|
||||
|
|
@ -61,7 +92,7 @@ public class MD5DB {
|
|||
/**
|
||||
* Create the MD5 file directories if necessary
|
||||
*/
|
||||
protected static void ensureMd5DbDirectory() {
|
||||
private void ensureMd5DbDirectory() {
|
||||
File dir = new File(LOCAL_MD5_DB_DIR);
|
||||
if ( ! dir.exists() ) {
|
||||
System.out.printf("##### Creating MD5 db %s%n", LOCAL_MD5_DB_DIR);
|
||||
|
|
@ -79,7 +110,7 @@ public class MD5DB {
|
|||
* @param valueIfNotFound
|
||||
* @return
|
||||
*/
|
||||
public static String getMD5FilePath(final String md5, final String valueIfNotFound) {
|
||||
public String getMD5FilePath(final String md5, final String valueIfNotFound) {
|
||||
// we prefer the global db to the local DB, so match it first
|
||||
for ( String dir : Arrays.asList(GLOBAL_MD5_DB_DIR, LOCAL_MD5_DB_DIR)) {
|
||||
File f = getFileForMD5(md5, dir);
|
||||
|
|
@ -99,7 +130,7 @@ public class MD5DB {
|
|||
* @param dbPath
|
||||
* @return
|
||||
*/
|
||||
private static File getFileForMD5(final String md5, final String dbPath) {
|
||||
private File getFileForMD5(final String md5, final String dbPath) {
|
||||
final String basename = String.format("%s.integrationtest", md5);
|
||||
return new File(dbPath + "/" + basename);
|
||||
}
|
||||
|
|
@ -110,7 +141,7 @@ public class MD5DB {
|
|||
* @param md5
|
||||
* @param resultsFile
|
||||
*/
|
||||
private static void updateMD5Db(final String md5, final File resultsFile) {
|
||||
private void updateMD5Db(final String md5, final File resultsFile) {
|
||||
copyFileToDB(getFileForMD5(md5, LOCAL_MD5_DB_DIR), resultsFile);
|
||||
copyFileToDB(getFileForMD5(md5, GLOBAL_MD5_DB_DIR), resultsFile);
|
||||
}
|
||||
|
|
@ -120,7 +151,7 @@ public class MD5DB {
|
|||
* @param dbFile
|
||||
* @param resultsFile
|
||||
*/
|
||||
private static void copyFileToDB(File dbFile, final File resultsFile) {
|
||||
private void copyFileToDB(File dbFile, final File resultsFile) {
|
||||
if ( ! dbFile.exists() ) {
|
||||
// the file isn't already in the db, copy it over
|
||||
System.out.printf("##### Updating MD5 file: %s%n", dbFile.getPath());
|
||||
|
|
@ -192,7 +223,7 @@ public class MD5DB {
|
|||
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
|
||||
* @return The calculated MD5.
|
||||
*/
|
||||
public static MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
public MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
final String actualMD5 = testFileMD5(name, resultsFile, expectedMD5, parameterize);
|
||||
String failMessage = null;
|
||||
boolean failed = false;
|
||||
|
|
@ -218,7 +249,7 @@ public class MD5DB {
|
|||
* @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
|
||||
* @return The calculated MD5.
|
||||
*/
|
||||
public static String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
public String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
|
||||
try {
|
||||
byte[] bytesOfMessage = getBytesFromFile(resultsFile);
|
||||
byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage);
|
||||
|
|
@ -247,11 +278,13 @@ public class MD5DB {
|
|||
BaseTest.log(String.format("calculated %s", filemd5sum));
|
||||
BaseTest.log(String.format("diff %s %s", pathToExpectedMD5File, pathToFileMD5File));
|
||||
|
||||
md5MismatchStream.printf("%s\t%s\t%s%n", expectedMD5, filemd5sum, name);
|
||||
md5MismatchStream.flush();
|
||||
|
||||
// inline differences
|
||||
// TODO -- capture output and put in log
|
||||
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
final PrintStream ps = new PrintStream(baos);
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(ps, 20, 10, 0, MAX_RAW_DIFFS_TO_SUMMARIZE);
|
||||
DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(ps, 20, 10, 0, MAX_RAW_DIFFS_TO_SUMMARIZE, false);
|
||||
boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), MAX_RECORDS_TO_READ, params);
|
||||
if ( success ) {
|
||||
final String content = baos.toString();
|
||||
|
|
|
|||
|
|
@ -40,10 +40,13 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.AfterSuite;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
||||
import java.io.File;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -52,13 +55,26 @@ public class WalkerTest extends BaseTest {
|
|||
private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false;
|
||||
private static final boolean ENABLE_ON_THE_FLY_CHECK_FOR_VCF_INDEX = false;
|
||||
|
||||
private static MD5DB md5DB = new MD5DB();
|
||||
|
||||
@BeforeMethod
|
||||
public void initializeRandomGenerator() {
|
||||
public void initializeWalkerTests() {
|
||||
logger.debug("Initializing walker tests");
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
}
|
||||
|
||||
@AfterSuite
|
||||
public void finalizeWalkerTests() {
|
||||
logger.debug("Finalizing walker tests");
|
||||
md5DB.close();
|
||||
}
|
||||
|
||||
public static MD5DB getMd5DB() {
|
||||
return md5DB;
|
||||
}
|
||||
|
||||
public MD5DB.MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
|
||||
return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
|
||||
return getMd5DB().assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
|
||||
}
|
||||
|
||||
public void validateOutputBCFIfPossible(final String name, final File resultFile) {
|
||||
|
|
@ -67,6 +83,7 @@ public class WalkerTest extends BaseTest {
|
|||
logger.warn("Checking shadow BCF output file " + bcfFile + " against VCF file " + resultFile);
|
||||
try {
|
||||
VariantContextTestProvider.assertVCFandBCFFilesAreTheSame(resultFile, bcfFile);
|
||||
logger.warn(" Shadow BCF PASSED!");
|
||||
} catch ( Exception e ) {
|
||||
Assert.fail("Exception received reading shadow BCFFile " + bcfFile + " for test " + name, e);
|
||||
}
|
||||
|
|
@ -103,9 +120,9 @@ public class WalkerTest extends BaseTest {
|
|||
|
||||
for (int i = 0; i < resultFiles.size(); i++) {
|
||||
MD5DB.MD5Match result = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
|
||||
validateOutputBCFIfPossible(name, resultFiles.get(i));
|
||||
if ( ! result.failed ) {
|
||||
validateOutputIndex(name, resultFiles.get(i));
|
||||
validateOutputBCFIfPossible(name, resultFiles.get(i));
|
||||
md5s.add(result.expectedMD5);
|
||||
} else {
|
||||
fails.add(result);
|
||||
|
|
@ -256,8 +273,6 @@ public class WalkerTest extends BaseTest {
|
|||
}
|
||||
|
||||
protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec) {
|
||||
MD5DB.ensureMd5DbDirectory(); // ensure the md5 directory exists
|
||||
|
||||
List<File> tmpFiles = new ArrayList<File>();
|
||||
for (int i = 0; i < spec.nOutputFiles; i++) {
|
||||
String ext = spec.exts == null ? ".tmp" : "." + spec.exts.get(i);
|
||||
|
|
@ -337,8 +352,11 @@ public class WalkerTest extends BaseTest {
|
|||
boolean gotAnException = false;
|
||||
try {
|
||||
final String now = new SimpleDateFormat("HH:mm:ss").format(new Date());
|
||||
System.out.println(String.format("[%s] Executing test %s with GATK arguments: %s",
|
||||
now, name, Utils.join(" ",command)));
|
||||
final String cmdline = Utils.join(" ",command);
|
||||
System.out.println(String.format("[%s] Executing test %s with GATK arguments: %s", now, name, cmdline));
|
||||
// also write the command line to the HTML log for convenient follow-up
|
||||
// do the replaceAll so paths become relative to the current
|
||||
BaseTest.log(cmdline.replaceAll(testDirRoot, ""));
|
||||
CommandLineExecutable.start(instance, command);
|
||||
} catch (Exception e) {
|
||||
gotAnException = true;
|
||||
|
|
|
|||
|
|
@ -764,23 +764,6 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void variantContextBindingArgumentTestVCF3() {
|
||||
final String[] commandLine = new String[] {"-V:vcf3",NON_EXISTANT_FILENAME_VCF};
|
||||
|
||||
parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
parsingEngine.validate();
|
||||
|
||||
VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider();
|
||||
parsingEngine.loadArgumentsIntoObject( argProvider );
|
||||
|
||||
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
|
||||
}
|
||||
|
||||
private class ListRodBindingArgProvider {
|
||||
@Input(fullName = "binding", shortName="V", required=false)
|
||||
public List<RodBinding<Feature>> bindings;
|
||||
|
|
|
|||
|
|
@ -47,10 +47,6 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
|
|||
testBadRODBindingInput("beagle", "BEAGLE input to VCF expecting walker", UserException.BadArgumentValue.class);
|
||||
}
|
||||
|
||||
@Test() private void testBadRODBindingInputType2() {
|
||||
testBadRODBindingInput("vcf3", "VCF3 input to VCF expecting walker", UserException.class);
|
||||
}
|
||||
|
||||
@Test() private void testBadRODBindingInputType3() {
|
||||
testBadRODBindingInput("bed", "Bed input to VCF expecting walker", UserException.BadArgumentValue.class);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(b36KGReference, "symbolic_alleles_1.vcf"),
|
||||
1,
|
||||
Arrays.asList("444a20659f67592a8284e0b7849e4302"));
|
||||
Arrays.asList("c79137da24ad4dc15cedc742de39247f"));
|
||||
executeTest("Test symbolic alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
|
||||
1,
|
||||
Arrays.asList("93a24c019663a6011b4d6de12538df11"));
|
||||
Arrays.asList("3f6cbbd5fdf164d87081a3af19eeeba7"));
|
||||
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("5720826c2bf6cbc762e4a888ef58c3f2"));
|
||||
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
|
||||
executeTest("test file has annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsNotAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("088e5db7d8de6606cd562885fa47f3b2"));
|
||||
Arrays.asList("9914bd19f6235c550e5182e0f4591da6"));
|
||||
executeTest("test file has annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("37fd6826db907f80d4631bae1b629da4"));
|
||||
Arrays.asList("6a52ef10bb10d72cdd82a8f7afc2dd09"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("8a85c20b219a8bb286df3c9f4e1cdc8c"));
|
||||
Arrays.asList("74d894fd31b449deffca88d0e465f01b"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("da446d3a3e9aefa7537b65b5adc3609b"));
|
||||
Arrays.asList("dd89dfa22f0e1d6760095e04f528d62a"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
// they don't get reordered. It's a good test of the genotype ordering system.
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("04c71d90e3df9d519160636ceb0f02b9"));
|
||||
Arrays.asList("542d9ed8290ef7868387af4127e0b5fa"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("6d64723c808a3dd774ed06e228f9c63d"));
|
||||
Arrays.asList("b1b32ed3b831c92c94258c8e4a60e8c9"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("153a23b2fa4eb0ee288e4bb2f0fc4bf8"));
|
||||
Arrays.asList("a25eacb0ceea2c082af349f8d7776c8a"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testExcludeAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a28a503ab204474ecee306c9eceb1060"));
|
||||
Arrays.asList("ef046909a6f6c6cb43653a255a99a014"));
|
||||
executeTest("test exclude annotations", spec);
|
||||
}
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("1d98be77dad9c703402de0315db5176a"));
|
||||
Arrays.asList("5c2fded3b6a96b0b0788086bbb2409ed"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("ea6201db7c1fd5cb9cc3110a3396c646"));
|
||||
Arrays.asList("c590088d85edce786604fd600f5d5e75"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("5103b9d9857530dc0ccdb8ca0a1db8c3"));
|
||||
Arrays.asList("ade9354a4cdd6cc92c169f252fb36f3f"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testMultipleIdsWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3withIDs.vcf -L " + testDir + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("d519c21ab0ae901d39856fea7e0e9d83"));
|
||||
Arrays.asList("f496f40e1e9efa743e3b473f6fe6e6d3"));
|
||||
executeTest("adding multiple IDs with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + testDir + "fakeHM3.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("746f3a431c6491b85dd6fcf75065550f"));
|
||||
Arrays.asList("d383fbd741d604625c9507d4da1c5a27"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoQuals() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + testDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + testDir + "noQual.vcf -A QualByDepth", 1,
|
||||
Arrays.asList("7ce09a89e72ee95f21313e496311068a"));
|
||||
Arrays.asList("4a247f039dfb16ac05b38a0dd5f98da6"));
|
||||
executeTest("test file doesn't have QUALs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("accce2796a967d05d756e1b5adecd6d2"));
|
||||
Arrays.asList("067792efcffea93ade632e52a80d0d8f"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
|
|
@ -146,13 +146,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("9a37502ab929ac3d5a829467f5612853"));
|
||||
Arrays.asList("66c68deb0508348324eb47d524e756de"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTabixAnnotations() {
|
||||
final String MD5 = "bb9a148716fc69d706c5be146c1afa00";
|
||||
final String MD5 = "5aebcf8f76c649d645708b1262185c80";
|
||||
for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -A HomopolymerRun --variant:vcf " + validationDataLocation + file + " -L " + validationDataLocation + "CEU.exon.2010_03.sites.vcf --no_cmdline_in_header", 1,
|
||||
|
|
@ -168,7 +168,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||
"snpEff2.0.5.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
|
||||
1,
|
||||
Arrays.asList("bef7201d9c79facbecba15d4abcc684b")
|
||||
Arrays.asList("0c20cda1cf0b903a287f1807ae5bee02")
|
||||
);
|
||||
executeTest("Testing SnpEff annotations", spec);
|
||||
}
|
||||
|
|
@ -187,7 +187,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testTDTAnnotation() {
|
||||
final String MD5 = "900e9d82ea3127aa06e676cf50b341f6";
|
||||
final String MD5 = "81f85f0ce8cc36df7c717c478e100ba1";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
|
||||
|
|
@ -198,7 +198,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testChromosomeCountsPed() {
|
||||
final String MD5 = "7fe0e9df2d9fb375beb7cf23afdb4c87";
|
||||
final String MD5 = "9830fe2247651377e68ad0b0894e9a4e";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
|
||||
|
|
@ -208,7 +208,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testInbreedingCoeffPed() {
|
||||
final String MD5 = "7aaf0033a823bbf9066b43764d8dd660";
|
||||
final String MD5 = "e94d589b5691e3ecfd9cc9475a384890";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
|
||||
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
|
||||
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
|
||||
"-o %s --no_cmdline_in_header", 1, Arrays.asList("7fd0d0c2d1af3b16378339c181e40611"));
|
||||
"-o %s --no_cmdline_in_header", 1, Arrays.asList("cdbf8cc557f5be9ac778e52338c0d906"));
|
||||
executeTest("test BeagleOutputToVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ProduceBeagleInput -R " + hg19Reference + " " +
|
||||
"--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
|
||||
"-o %s", 1, Arrays.asList("a01c704246f3dd1b9c65774007e51e69"));
|
||||
"-o %s", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
|
||||
executeTest("test BeagleInput", spec);
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +60,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
|
||||
"--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
|
||||
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
|
||||
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166"));
|
||||
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","4b6417f892ccfe5c63b8a60cb0ef3740"));
|
||||
executeTest("test BeagleInputWithBootstrap",spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
|
||||
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
|
||||
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("43865f3f0d975ee2c5912b31393842f8"));
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("8c05bda0630155bcd0ebaf155ed5e491"));
|
||||
|
||||
executeTest("testBeagleChangesSitesToRef",spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,12 +35,14 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
|
|||
private class TestParams extends TestDataProvider {
|
||||
public File master, test;
|
||||
public String MD5;
|
||||
public boolean doPairwise;
|
||||
|
||||
private TestParams(String master, String test, String MD5) {
|
||||
private TestParams(String master, String test, final boolean doPairwise, String MD5) {
|
||||
super(TestParams.class);
|
||||
this.master = new File(master);
|
||||
this.test = new File(test);
|
||||
this.MD5 = MD5;
|
||||
this.doPairwise = doPairwise;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
|
@ -50,8 +52,10 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
|
|||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData() {
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "daead9bfab1a5df72c5e3a239366118e");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "3f46f5a964f7c34015d972256fe49a35");
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", true, "bf7ef17436a7eccf27be41a9477904f6");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35");
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", false, "8ab29169cff232e670db9a4c54fc4358");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9");
|
||||
return TestParams.getTests(TestParams.class);
|
||||
}
|
||||
|
||||
|
|
@ -61,6 +65,7 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
|
|||
"-T DiffObjects -R public/testdata/exampleFASTA.fasta "
|
||||
+ " -m " + params.master
|
||||
+ " -t " + params.test
|
||||
+ (params.doPairwise ? " -doPairwise " : "")
|
||||
+ " -o %s",
|
||||
Arrays.asList(params.MD5));
|
||||
executeTest("testDiffObjects:"+params, spec).getFirst();
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testNoAction() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("5720826c2bf6cbc762e4a888ef58c3f2"));
|
||||
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
|
||||
executeTest("test no action", spec);
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testClusteredSnps() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -window 10 --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("d7c2a4b0c1b2b982847508997ba57ebf"));
|
||||
Arrays.asList("4a4596929f9fe983d8868ca142567781"));
|
||||
executeTest("test clustered SNPs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMask1() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo --mask:VCF3 " + testDir + "vcfexample2.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("890774962576c407d8a17ed57cf704c1"));
|
||||
Arrays.asList("1719462cd17986c33e59e45b69df0270"));
|
||||
executeTest("test mask all", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMask2() {
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("8864573dbf52908501140e6b0afcbc90"));
|
||||
Arrays.asList("db19ff7d90c82cda09fb3c3878100eb5"));
|
||||
executeTest("test mask some", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMask3() {
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("42a1c08763f151073a49e3c7bb68028b"));
|
||||
Arrays.asList("a9e417cba21585c786d4b9930265ea31"));
|
||||
executeTest("test mask extend", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("ef8100c3b7c67d28571cbda771c414c2"));
|
||||
Arrays.asList("4160904b180d1f62a6bf50de6728ce00"));
|
||||
executeTest("test filter #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("318ed3874fd42b7da8c59554a25a1fab"));
|
||||
Arrays.asList("df80db30c7836731ac7c8c3d4fc005b4"));
|
||||
executeTest("test filter #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilterWithSeparateNames() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("9cb398e78a38a7bc5e839e28c8dae2eb"));
|
||||
Arrays.asList("71ce6c0952831cb68f575aa0173dce2b"));
|
||||
executeTest("test filter with separate names #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFilters1() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b38709f932b969e4267603333863269e"));
|
||||
Arrays.asList("179f7f2a90c0e6c656109aac9b775476"));
|
||||
executeTest("test genotype filter #1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -88,7 +88,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFilters2() {
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("0e1457e678326e44e92ee13e84414e0f"));
|
||||
Arrays.asList("22e07c27feb9017a130dfb045c5b29b9"));
|
||||
executeTest("test genotype filter #2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testDeletions() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + testDir + "twoDeletions.vcf", 1,
|
||||
Arrays.asList("569546fd798afa0e65c5b61b440d07ac"));
|
||||
Arrays.asList("637256ee5348c1c57f1dadf581b06ed9"));
|
||||
executeTest("test deletions", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
|
|
@ -50,7 +51,7 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private static Genotype createGenotype(String name, double[] gls) {
|
||||
return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls);
|
||||
return new GenotypeBuilder(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(gls).make();
|
||||
}
|
||||
|
||||
@DataProvider(name = "getGLs")
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("bf5c76bec6e00199d441b6175b6b7c39"));
|
||||
Arrays.asList("b6c677b2375541fd2db775d0029571e6"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("9f56f8d62c047213c894c3f250706aea"));
|
||||
Arrays.asList("3400dfae6db8ed7e1351b1aa52341714"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("0a5048062cd9022b761ae87efed5957e"));
|
||||
Arrays.asList("0bb67b07ee5315d0486f3a0045a03757"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("f50a30bf9bbd4e5dcd5d7d9282b6dadf"));
|
||||
Arrays.asList("5c5bf3d2676e1a26d521f1f902f73526"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -61,7 +61,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + testDir + "multiallelic.snps.bam -o %s -L " + testDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("6fb6ea5f2b9da02a0fea7cb2994fb5db"));
|
||||
Arrays.asList("eb6c8b7680f40b5fdac6e451c623ab81"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + testDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("95158fb50db5d41a678cd331a3ffe5e1"));
|
||||
Arrays.asList("e2cf97bca4a720ca64ca7f682da6c9f0"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("c86e05f315a86bc190d72cde911e6fe2"));
|
||||
Arrays.asList("0c195201574815559757885c693b6640"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "f6d655714706b6e8390037db3fad60ef";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "6209a19a33ac9e187a9074cee549f93b";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "7bc812cc553b4ab77c08049f0e32d0f6";
|
||||
String md5 = "34cb7146c037925e8f324cffd986834d";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -140,7 +140,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("dfeaccb68165fdaffafde9150914432d"));
|
||||
Arrays.asList("f48e4898c741c84354da3a0562cb44e1"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
|
|
@ -148,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("35ef19b4f248969c74da8bd7489385d6"));
|
||||
Arrays.asList("f4ef85f1ed72e35b91b0469edf5956ad"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -156,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("aa49989fde8c6378f5c751f8b267c471"));
|
||||
Arrays.asList("ea219bdce9596e8649ad1d39e24e333a"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -164,23 +164,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("ffaeb60a5776d85b41c64786ddc4d14d"));
|
||||
Arrays.asList("9d5c51379e1b1031da5735aa8c965766"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "f9a4005c53291170800e6023503d5635");
|
||||
testOutputParameters("-sites_only", "ac8bea16be247d9e39d66a6305409f57");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "e6c63baff51aaeb318c8bebaf2989828");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "1e908a3164adbab10dcb6415e2645954");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "43ffa34646d781a368ea81342c21ae2e");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "eee23523912b51b249472e6d5fc0aece");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -194,7 +194,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("c7cb29121eb30e752ab6652a6d2a62a6"));
|
||||
Arrays.asList("355bee3d375e994e4a3b07f7a8d267a0"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -202,7 +202,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("e7bdb76be82420a03ff28038d283822d"));
|
||||
Arrays.asList("72d9ea93591b17535b7f5b53e1d064cb"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -213,12 +213,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "ca65e199e9ff0bc986df3dee74e11eb1" );
|
||||
testHeterozosity( 0.01, "0ffd19f90b05652e45f58e4a959ae304" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "ddcdfe4a5252da59278a6f1ba6f8a175" );
|
||||
testHeterozosity( 1.0 / 1850, "b6dbfb567e433273fe90b0d038556a9f" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("c4b3876d76e3d0fb78a1d3ebd674f1a1"));
|
||||
Arrays.asList("c9675bc1ca6c82cb60d39d9395881c96"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -261,7 +261,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("41445b1cd1a82af71126ff1692f7a5fe"));
|
||||
Arrays.asList("6e4089986d08d46a8d0b4ddfd611a7c3"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("c9e79470a4ce6eacde366e9fcf4d5b14"));
|
||||
Arrays.asList("80a5a499cc553ee579ba93dcb967e5ef"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -295,7 +295,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("70f8a17ba68131520db5c764ac5acdd2"));
|
||||
Arrays.asList("9271105e630ab39cf1c88b338da54594"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -308,7 +308,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("e4316d80fd833886820c8b4e122fbfc4"));
|
||||
Arrays.asList("d77a379429ca848cea552c4697b86472"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -318,7 +318,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("c92aba3635f3331ddf8ae7a0382ca594"));
|
||||
Arrays.asList("f83c4f370ed0a343ca0808e5da3d997d"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -328,7 +328,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("b87034f349887160ec1124e12863d543"));
|
||||
Arrays.asList("ca5459e93a9955aec8f93abf7f84e5ed"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -336,13 +336,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("51e6a7868d2ea2daefa411ed82f18be2"));
|
||||
Arrays.asList("04aaeff1e9f97bbf2dc2d6d754f25a0d"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("954c52be0c6ca9ed5a213a53f4efbc10"));
|
||||
Arrays.asList("5c7db047ae9417d37c6bbda1d8ea6019"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -352,7 +352,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + testDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("ae44230ed54fd8ce63711cae908470cb"));
|
||||
Arrays.asList("3e3ac23846801c34acbf10a1a527264a"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -385,7 +385,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("471012c1d3dbec4633710264de5daa24"));
|
||||
Arrays.asList("90e8140f114e026f2a0e7a881baa3f20"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -393,7 +393,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("9165507fb202d515512a947a8a9db6bb"));
|
||||
Arrays.asList("db70b7a015fa882c8ce1e4c43f589f22"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -401,7 +401,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("c1bbd4998b7c6dffee1682d3e5c929cc"));
|
||||
Arrays.asList("50a6774b7d8f71fe0e125c204d50ba84"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("d54a142d68dca54e478c13f9a0e4c95c","1a37fcc93a73429f9065b942ab771233")
|
||||
Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","313cc749c7ee97713e4551de39e01ac5")
|
||||
);
|
||||
executeTest("testTrueNegativeMV", spec);
|
||||
}
|
||||
|
|
@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("883ea7fd2b200c4b7fa95a4f7aa15931","7b1f5309c3d4f4aa7e9061f288dceb68")
|
||||
Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","dd90dad9fd11e1b16e6660c3ca0553e7")
|
||||
);
|
||||
executeTest("testTruePositiveMV", spec);
|
||||
}
|
||||
|
|
@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("e812d62a3449b74b6948ee7deb8a0790","d00922496759e84c66a4b5e222e36997")
|
||||
Arrays.asList("719d681bb0a52a40bc854bba107c5c94","b35a86d2cad17f0db7b5e84ddc0e5545")
|
||||
);
|
||||
executeTest("testFalsePositiveMV", spec);
|
||||
}
|
||||
|
|
@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("e3c572f933a40e1878a2cfa52049517a","0de6cccfec929caa07cd0eeafacbfffd")
|
||||
Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","c53b5fd377bef48e9c6035a94db398db")
|
||||
);
|
||||
executeTest("testSpecialCases", spec);
|
||||
}
|
||||
|
|
@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("b42af3b73a2cb38cfc92f8047dd686b3","a69c3f9c005e852b44c29ab25e87ba0d")
|
||||
Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","6f596470740e1a57679bbb38c0126364")
|
||||
);
|
||||
executeTest("testPriorOption", spec);
|
||||
}
|
||||
|
|
@ -128,7 +128,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("d00922496759e84c66a4b5e222e36997")
|
||||
Arrays.asList("b35a86d2cad17f0db7b5e84ddc0e5545")
|
||||
);
|
||||
executeTest("testMVFileOption", spec);
|
||||
}
|
||||
|
|
@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-fatherAlleleFirst"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("c158a3816357597543ef85c4478c41e8","4f8daca19c8f31bd87850c124f91e330")
|
||||
Arrays.asList("60ced3d078792a150a03640b62926857","6d550784382aa910f78b533d889c91c0")
|
||||
);
|
||||
executeTest("testFatherAlleleFirst", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("2520f93505fda28d44f618a0123d593b"));
|
||||
Arrays.asList("0a41b96b04a87fdb99bc3342d48d2eba"));
|
||||
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:1232503-1332503",
|
||||
1,
|
||||
Arrays.asList("965b8f448365b7f4a124d32e809eb048"));
|
||||
Arrays.asList("f7517896c899a872c24d8e823ac9deae"));
|
||||
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("60f5bb699335f47cdc505322c5be3803"));
|
||||
Arrays.asList("cdbdd2f68c232012b6fe9a322b0ea24c"));
|
||||
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("023c2fb43b50807cfd46841ed6f0d215"));
|
||||
Arrays.asList("6b70e3e4e28f9583d35d98bf8a7d0d59"));
|
||||
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
|
||||
+ " -L chr20:332341-482503",
|
||||
1,
|
||||
Arrays.asList("e5e6e9f84d108d5b001aa53017d2801e"));
|
||||
Arrays.asList("6163a1fba27532da77765a7a11c55332"));
|
||||
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:652810-681757",
|
||||
1,
|
||||
Arrays.asList("8fc53bfbea2754ff8577460786a3400c"));
|
||||
Arrays.asList("61a7d05f9eb4317cf0e6937d72e1e7ec"));
|
||||
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-802503",
|
||||
1,
|
||||
Arrays.asList("c37548b333b65f58d0edfc5c2a62a28a"));
|
||||
Arrays.asList("44eb225ab3167651ec0a9e1fdcc83d34"));
|
||||
executeTest("Use trio-phased VCF, but ignore its phasing [TEST SEVEN]", spec);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "CEU.trio.2010_03.genotypes.hg18.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-802503" + " -respectPhaseInInput",
|
||||
1,
|
||||
Arrays.asList("dfc7cdddd702e63d46d04f61a3ecd720"));
|
||||
Arrays.asList("e3549b89d49092e73cc6eb21f233471c"));
|
||||
executeTest("Use trio-phased VCF, and respect its phasing [TEST EIGHT]", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleNone + freqUnif + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("6a9e990a9252840904b5144213915b32")
|
||||
Arrays.asList("b8a988757ac1f206d123140da5a3e778")
|
||||
);
|
||||
|
||||
executeTest("testNoSampleSelectionFreqUniform--" + testfile, spec);
|
||||
|
|
@ -45,7 +45,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleNone + freqAF + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("eaa2385086cddff68cf4fdb81cbdbbb9")
|
||||
Arrays.asList("542d5d5ff8c64da7b077bab4b950a9a3")
|
||||
);
|
||||
|
||||
executeTest("testNoSampleSelectionFreqAF--" + testfile, spec);
|
||||
|
|
@ -57,7 +57,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleGT + freqUnif + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("24077656f590d6905546f7e019c8dccb")
|
||||
Arrays.asList("7385b17eed7f4ff0f6e82e60c3334ce7")
|
||||
);
|
||||
|
||||
executeTest("testPolyGTFreqUniform--" + testfile, spec);
|
||||
|
|
@ -69,7 +69,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleGT + freqAF + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("3c1180fd9b5e80e540b39c5a95fbe722")
|
||||
Arrays.asList("0ee4a565a0d4f6b6942abd72a373becd")
|
||||
);
|
||||
|
||||
executeTest("testPolyGTFreqAF--" + testfile, spec);
|
||||
|
|
@ -81,7 +81,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleGL + freqAF + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("ad30c028864348204ebe80b9c8c503e8")
|
||||
Arrays.asList("0ee4a565a0d4f6b6942abd72a373becd")
|
||||
);
|
||||
|
||||
executeTest("testPolyGLFreqAF--" + testfile, spec);
|
||||
|
|
|
|||
|
|
@ -302,7 +302,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
String tests = cmdRoot +
|
||||
" --dbsnp " + b36dbSNP129 +
|
||||
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
||||
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
||||
" --comp:comp_genotypes " + testDir + "yri.trio.gatk.ug.head.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||
1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c"));
|
||||
executeTestParallel("testSelect1", spec);
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
|
||||
"a45a78de049cfe767ce23d3423f80b01", // recal file
|
||||
"1050c387d170639f8cec221e5dddd626"); // cut VCF
|
||||
"6e1f98bb819ccf03e17a2288742160d3", // recal file
|
||||
"c58ff4140e8914f0b656ed625c7f73b9"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
@ -67,16 +67,16 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" --no_cmdline_in_header" +
|
||||
" -input " + params.inVCF +
|
||||
" -o %s" +
|
||||
" -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) +
|
||||
" -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null),
|
||||
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
|
||||
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
|
||||
Arrays.asList(params.cutVCFMD5));
|
||||
executeTest("testApplyRecalibration-"+params.inVCF, spec);
|
||||
}
|
||||
|
||||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
|
||||
"918a5ecad5a2a8a46795144366683188", // recal file
|
||||
"bf0e8ed5e250d52f0545074c61217d16"); // cut VCF
|
||||
"8e2417336fa62e6c4d9f61b6deebdd82", // recal file
|
||||
"05e88052e0798f1c1e83f0a8938bce56"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRIndelTest")
|
||||
public Object[][] createData2() {
|
||||
|
|
@ -115,8 +115,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" --no_cmdline_in_header" +
|
||||
" -input " + params.inVCF +
|
||||
" -o %s" +
|
||||
" -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) +
|
||||
" -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null),
|
||||
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
|
||||
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
|
||||
Arrays.asList(params.cutVCFMD5));
|
||||
executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec);
|
||||
}
|
||||
|
|
@ -133,7 +133,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -tranchesFile " + testDir + "VQSR.mixedTest.tranches" +
|
||||
" -recalFile " + testDir + "VQSR.mixedTest.recal",
|
||||
Arrays.asList("9039576b63728df7ee2c881817c0e9eb"));
|
||||
Arrays.asList("1370d7701a6231633d43a8062b7aff7f"));
|
||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,26 +78,26 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
}
|
||||
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c1e82f0842ca721d10f21604f26a5248"); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "b2fcf3983cc9e667b9bbed8372080776", " -setKey foo"); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "98c0cbb94e5debf7545a656665a1b659", " -setKey null"); }
|
||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "10170f9e72cc831a5820bd03e70fe46a"); } // official project VCF files in tabix format
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b"); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo"); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
|
||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
|
||||
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "074e909f80ffcc9fddc3fac89ea36bef"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f26980af214011c0452b8ce843f3063b"); }
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); }
|
||||
|
||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "5bc1de1197506aced0f9e7a08b572c44"); }
|
||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "284083f60792c5f817899445dfa63a42"); }
|
||||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "06e86711bcf0efc0f0c4a378f6147cf6"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "03103f6b39e9fb7a396df0013f01fae6"); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "12fc1b8145f7884762f0c2cbbd319ae1"); }
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "91f6087e6e2bf3df4d1c9700eaff958b"); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "7e2dba80ba38b2a86713f635d630eb59"); }
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "a9be239ab5e03e7e97caef58a3841dd2"); }
|
||||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "3950392e1b8b53ae363e705185ad1da9"); }
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }
|
||||
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "5c60eb8d5d4b957a0cf52ca008f021ba"); }
|
||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "774b43e69cc7ec93090b4f6e9f4a1079"); }
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); }
|
||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "5f61145949180bf2a0cd342d8e064860"); }
|
||||
|
||||
@Test public void threeWayWithRefs() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("948291bbf47d1cec692d0fe4358ff92c"));
|
||||
Arrays.asList("c0d4d601aa5d2b29927c535868448d2a"));
|
||||
executeTest("threeWayWithRefs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -127,17 +127,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
@Test public void complexTestFull() { combineComplexSites("", "dd805f6edfc3cf724512dfbbe8df5183"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "14a205edb022f79abf1863588cfee56b"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "e118d04d1d47c02ad38c046561a9f616"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "e118d04d1d47c02ad38c046561a9f616"); }
|
||||
@Test public void complexTestFull() { combineComplexSites("", "7d587bf49bbc9f8239476bab84bf9708"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "4d1e0c12d95f50e472493fc14af3cc06"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "9a98b01b9b2a28ae6af3125edc131dea"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "9a98b01b9b2a28ae6af3125edc131dea"); }
|
||||
|
||||
@Test
|
||||
public void combineDBSNPDuplicateSites() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||
1,
|
||||
Arrays.asList("a838dc241cf357466cd4331fd298c73a"));
|
||||
Arrays.asList("3d2a5a43db86e3f6217ed2a63251285b"));
|
||||
executeTest("combineDBSNPDuplicateSites:", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderUnitTest;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class LeftAlignVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + testDir + "forLeftAlignVariantsTest.vcf --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("8e0991576518823b339a4e2f83299d4f"));
|
||||
Arrays.asList("bcf05f56adbb32a47b6d6b27b327d5c2"));
|
||||
executeTest("test left alignment", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testb36Tohg19() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd"));
|
||||
executeTest("test b36 to hg19", spec);
|
||||
|
|
@ -47,7 +47,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testb36Tohg19UnsortedSamples() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("07d1bf52125d1f9a25e260e13ec7b010"));
|
||||
executeTest("test b36 to hg19, unsorted samples", spec);
|
||||
|
|
|
|||
|
|
@ -10,6 +10,47 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
return "-T SelectVariants -R " + b36KGReference + " -L 1 -o %s --no_cmdline_in_header" + args;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDiscordanceNoSampleSpecified() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("133fd0ded0bb213097cbe68995afbb7e")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRepeatedLineSelection() {
|
||||
String testfile = testDir + "test.dup.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("b2ee12588ebda200727762a903b8c972")
|
||||
);
|
||||
|
||||
executeTest("testRepeatedLineSelection--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDiscordance() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("f64c90c4cca470f1095d9fa2062eac3e")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testDiscordance--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexSelection() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
|
|
@ -18,7 +59,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
|
|
@ -32,53 +73,13 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("bbd7b28d1c5701e17b395d64f8b6f13d")
|
||||
Arrays.asList("b24f31db48d254d8fe15295955173486")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testSampleExclusion--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRepeatedLineSelection() {
|
||||
String testfile = testDir + "test.dup.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("77579c53dbde4e8171f3cee83b98351b")
|
||||
);
|
||||
|
||||
executeTest("testRepeatedLineSelection--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDiscordance() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("03abdc27bfd7aa36d57bba0325b31e0d")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testDiscordance--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDiscordanceNoSampleSpecified() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("9fb54ed003234a5847c565ffb6767b95")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcordance() {
|
||||
|
|
@ -87,7 +88,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("76857b016198c3e08a2e27bbdb49f3f0")
|
||||
Arrays.asList("9da5dab3d344c1c0a5987b15e60fa082")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -101,7 +102,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("6c0b0c5f03d26f4a7a1438a2afc9fb6b")
|
||||
Arrays.asList("30b89b3a6706f7f46b23bfb3be69cc8e")
|
||||
);
|
||||
|
||||
executeTest("testVariantTypeSelection--" + testFile, spec);
|
||||
|
|
@ -114,7 +115,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("a8a26c621018142c9cba1080cbe687a8")
|
||||
Arrays.asList("8bf557aaa07eccb294c81f491225bf9e")
|
||||
);
|
||||
|
||||
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||
|
|
@ -127,7 +128,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("6bee6dc2316aa539560a6d9d8adbc4ff")
|
||||
Arrays.asList("5bf9663274ceb552f5469f8c1dfc22ed")
|
||||
);
|
||||
|
||||
executeTest("testRegenotype--" + testFile, spec);
|
||||
|
|
@ -140,10 +141,10 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("6ff686a64e98fc1be2cde9b034d4a43a")
|
||||
Arrays.asList("cb9932f9a7aa2e53af605b30d88ad43f")
|
||||
);
|
||||
|
||||
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
|
||||
executeTest("testMultipleRecordsAtOnePosition--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -153,13 +154,13 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("95c4d43b11c3d0dd3ab19941c474269b")
|
||||
Arrays.asList("920605cc2182026e3f54c009f6a04141")
|
||||
);
|
||||
|
||||
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
|
||||
executeTest("testNoGTs--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = false)
|
||||
public void testParallelization2() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
|
@ -168,13 +169,13 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
1,
|
||||
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = false)
|
||||
public void testParallelization4() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
|
@ -182,7 +183,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
1,
|
||||
Arrays.asList("6cd82274335eeb0b449e571f38d54d3a")
|
||||
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -196,7 +197,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("fa92b3b41f1c04f685be8de32afc9706")
|
||||
Arrays.asList("2f2a342812ba914bcce666e42ef761d7")
|
||||
);
|
||||
executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
|
||||
|
||||
// Copy VCF data from the test file into the FIFO.
|
||||
String testFile = validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
||||
String testFile = testDir + "yri.trio.gatk.ug.head.vcf";
|
||||
FileInputStream inputStream = new FileInputStream(testFile);
|
||||
FileOutputStream outputStream = new FileOutputStream(tmpFifo);
|
||||
outputStream.getChannel().transferFrom(inputStream.getChannel(),0,inputStream.getChannel().size());
|
||||
|
|
@ -56,11 +56,11 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants" +
|
||||
" -R " + b36KGReference +
|
||||
" --variant:vcf3,storage=STREAM " + tmpFifo.getAbsolutePath() +
|
||||
" --variant:VCF,storage=STREAM " + tmpFifo.getAbsolutePath() +
|
||||
" --no_cmdline_in_header " +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("c5e93b0e2e8610785d43e5d9e7fb5a7b")
|
||||
Arrays.asList("b532a20b5af4e8ea7a073888976c71ba")
|
||||
);
|
||||
|
||||
executeTest("testSimpleVCFStreaming", spec);
|
||||
|
|
@ -74,13 +74,13 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
File tmpFifo = File.createTempFile("vcfstreaming","");
|
||||
Runtime.getRuntime().exec(new String[] {"mkfifo",tmpFifo.getAbsolutePath()});
|
||||
|
||||
String testFile = validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
||||
String testFile = testDir + "yri.trio.gatk.ug.head.vcf";
|
||||
|
||||
// Output select to FIFO
|
||||
WalkerTestSpec selectTestSpec = new WalkerTestSpec(
|
||||
"-T SelectVariants" +
|
||||
" -R " + b36KGReference +
|
||||
" --variant:vcf3,storage=STREAM " + testFile +
|
||||
" --variant:VCF,storage=STREAM " + testFile +
|
||||
" --no_cmdline_in_header" +
|
||||
" -select 'QD > 2.0'" +
|
||||
" -o " + tmpFifo.getAbsolutePath(),
|
||||
|
|
@ -93,7 +93,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
selectTestSpec = new WalkerTestSpec(
|
||||
"-T VariantEval" +
|
||||
" -R " + b36KGReference +
|
||||
" --eval:vcf3 " + testFile +
|
||||
" --eval " + testFile +
|
||||
" --comp:vcf,storage=STREAM " + tmpFifo.getAbsolutePath() +
|
||||
" -EV CompOverlap -noEV -noST" +
|
||||
" -o %s",
|
||||
|
|
|
|||
|
|
@ -83,10 +83,23 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
|||
" -GF RD" +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("f80c4714d83226b6a6db8bf281b3bcba"));
|
||||
Arrays.asList("d43562e9b94f0e8e337d38a6829671ee"));
|
||||
executeTest("testGenotypeFields", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testGenotypeFieldsWithInline() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --variant " + testDir + "vcfexample2.vcf" +
|
||||
" -T VariantsToTable" +
|
||||
" -GF RD -GF GT -GF GQ" +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("29744059742ae71fd6aabd29e5c391fb"));
|
||||
executeTest("testGenotypeFieldsWithInline", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMoltenOutput() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -111,7 +124,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
|||
" --moltenize" +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("132890fd33d16946e04b41cfd7453c0e"));
|
||||
Arrays.asList("1d97fe63c249a995df4ce666382872d8"));
|
||||
executeTest("testMoltenOutputWithGenotypeFields", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantsToVCFUsingDbsnpInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("a26afcce2a89f905a49c3d09719586b2");
|
||||
md5.add("268c116f825c2a4b5200a416ca587adc");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -36,7 +36,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantsToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("4accae035d271b35ee2ec58f403c68c6");
|
||||
md5.add("82ca5ecef2df5d64dee9ef5a4b14ef2f");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -54,7 +54,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("2413f036ec4100b8d5db179946159a82");
|
||||
md5.add("90bc2e21d633fa6c3c47c6bd86c134a0");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -72,7 +72,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingHapMapInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("f343085305e80c7a2493422e4eaad983");
|
||||
md5.add("bb71dabd072a679cc85fe8d3e130fb2b");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -89,7 +89,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingVCFInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("b1ddde7efff9c405f8f92f0a636cd919");
|
||||
md5.add("ae39e2249bc20fcd0a668a7fe5fb84b0");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
|
|||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.BCF2Encoder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -41,7 +41,10 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
||||
|
|
@ -172,13 +175,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSingletons")
|
||||
public Object[][] BCF2EncodingTestProviderSingletons() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv : primitives )
|
||||
tests.add(new Object[]{Arrays.asList(tv)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Test encoding of basic types
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderBasicTypes")
|
||||
public Object[][] BCF2EncodingTestProviderBasicTypes() {
|
||||
|
|
@ -188,36 +189,68 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSequences")
|
||||
public Object[][] BCF2EncodingTestProviderSequences() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv1 : forCombinations )
|
||||
for ( BCF2TypedValue tv2 : forCombinations )
|
||||
for ( BCF2TypedValue tv3 : forCombinations )
|
||||
tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
private interface EncodeMe {
|
||||
public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException;
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSingletons")
|
||||
public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
public void testBCF2BasicTypesWithStaticCalls(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
testBCF2BasicTypesWithEncodeMe(toEncode,
|
||||
new EncodeMe() {
|
||||
@Override
|
||||
public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException {
|
||||
switch ( tv.type ) {
|
||||
case INT8:
|
||||
case INT16:
|
||||
case INT32:
|
||||
encoder.encodeTypedInt((Integer)tv.value, tv.type);
|
||||
break;
|
||||
case FLOAT:
|
||||
encoder.encodeTypedFloat((Double)tv.value);
|
||||
break;
|
||||
case CHAR:
|
||||
encoder.encodeTypedString((String)tv.value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@DataProvider(name = "ListOfStrings")
|
||||
public Object[][] listOfStringsProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"});
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"});
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"});
|
||||
return tests.toArray(new Object[][]{});
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
public void testBCF2BasicTypesWithObjectType(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
testBCF2BasicTypesWithEncodeMe(toEncode,
|
||||
new EncodeMe() {
|
||||
@Override
|
||||
public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException {
|
||||
encoder.encodeTyped(tv.value, tv.type);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ListOfStrings")
|
||||
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
|
||||
final String collapsed = BCF2Utils.collapseStringList(strings);
|
||||
Assert.assertEquals(collapsed, expected);
|
||||
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
public void testBCF2BasicTypesWithObjectNoType(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
testBCF2BasicTypesWithEncodeMe(toEncode,
|
||||
new EncodeMe() {
|
||||
@Override
|
||||
public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException {
|
||||
encoder.encode(tv.value);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void testBCF2BasicTypesWithEncodeMe(final List<BCF2TypedValue> toEncode, final EncodeMe func) throws IOException {
|
||||
for ( final BCF2TypedValue tv : toEncode ) {
|
||||
BCF2Encoder encoder = new BCF2Encoder();
|
||||
func.encode(encoder, tv);
|
||||
|
||||
BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
|
||||
final Object decoded = decoder.decodeTypedValue();
|
||||
|
||||
Assert.assertNotNull(decoded);
|
||||
Assert.assertFalse(decoded instanceof List);
|
||||
myAssertEquals(tv, decoded);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
|
|
@ -240,30 +273,34 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "BestIntTypeTests")
|
||||
public Object[][] BestIntTypeTests() {
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSingletons")
|
||||
public Object[][] BCF2EncodingTestProviderSingletons() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
tests.add(new Object[]{Arrays.asList(1), BCF2Type.INT8});
|
||||
tests.add(new Object[]{Arrays.asList(1, 10), BCF2Type.INT8});
|
||||
tests.add(new Object[]{Arrays.asList(1, 10, 100), BCF2Type.INT8});
|
||||
tests.add(new Object[]{Arrays.asList(1, -1), BCF2Type.INT8});
|
||||
tests.add(new Object[]{Arrays.asList(1, 1000), BCF2Type.INT16});
|
||||
tests.add(new Object[]{Arrays.asList(1, 1000, 10), BCF2Type.INT16});
|
||||
tests.add(new Object[]{Arrays.asList(1, 1000, 100), BCF2Type.INT16});
|
||||
tests.add(new Object[]{Arrays.asList(1000), BCF2Type.INT16});
|
||||
tests.add(new Object[]{Arrays.asList(100000), BCF2Type.INT32});
|
||||
tests.add(new Object[]{Arrays.asList(100000, 10), BCF2Type.INT32});
|
||||
tests.add(new Object[]{Arrays.asList(100000, 100), BCF2Type.INT32});
|
||||
tests.add(new Object[]{Arrays.asList(100000, 1, -10), BCF2Type.INT32});
|
||||
tests.add(new Object[]{Arrays.asList(-100000, 1, -10), BCF2Type.INT32});
|
||||
for ( BCF2TypedValue tv : primitives )
|
||||
tests.add(new Object[]{Arrays.asList(tv)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BestIntTypeTests")
|
||||
public void determineBestEncoding(final List<Integer> ints, final BCF2Type expectedType) throws IOException {
|
||||
BCF2Encoder encoder = new BCF2Encoder();
|
||||
Assert.assertEquals(encoder.determineIntegerType(ints), expectedType);
|
||||
Assert.assertEquals(encoder.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType);
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSingletons")
|
||||
public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Test encoding of vectors
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSequences")
|
||||
public Object[][] BCF2EncodingTestProviderSequences() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv1 : forCombinations )
|
||||
for ( BCF2TypedValue tv2 : forCombinations )
|
||||
for ( BCF2TypedValue tv3 : forCombinations )
|
||||
tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
|
|
@ -289,13 +326,72 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons")
|
||||
public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Test strings and lists of strings
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "ListOfStrings")
|
||||
public Object[][] listOfStringsProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"});
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"});
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ListOfStrings")
|
||||
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
|
||||
final String collapsed = BCF2Utils.collapseStringList(strings);
|
||||
Assert.assertEquals(collapsed, expected);
|
||||
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Tests to determine the best type of arrays of integers
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "BestIntTypeTests")
|
||||
public Object[][] BestIntTypeTests() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
tests.add(new Object[]{Arrays.asList(1), BCF2Type.INT8});
|
||||
tests.add(new Object[]{Arrays.asList(1, 10), BCF2Type.INT8});
|
||||
tests.add(new Object[]{Arrays.asList(1, 10, 100), BCF2Type.INT8});
|
||||
tests.add(new Object[]{Arrays.asList(1, -1), BCF2Type.INT8});
|
||||
tests.add(new Object[]{Arrays.asList(1, 1000), BCF2Type.INT16});
|
||||
tests.add(new Object[]{Arrays.asList(1, 1000, 10), BCF2Type.INT16});
|
||||
tests.add(new Object[]{Arrays.asList(1, 1000, 100), BCF2Type.INT16});
|
||||
tests.add(new Object[]{Arrays.asList(1000), BCF2Type.INT16});
|
||||
tests.add(new Object[]{Arrays.asList(100000), BCF2Type.INT32});
|
||||
tests.add(new Object[]{Arrays.asList(100000, 10), BCF2Type.INT32});
|
||||
tests.add(new Object[]{Arrays.asList(100000, 100), BCF2Type.INT32});
|
||||
tests.add(new Object[]{Arrays.asList(100000, 1, -10), BCF2Type.INT32});
|
||||
tests.add(new Object[]{Arrays.asList(-100000, 1, -10), BCF2Type.INT32});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BestIntTypeTests")
|
||||
public void determineBestEncoding(final List<Integer> ints, final BCF2Type expectedType) throws IOException {
|
||||
BCF2Encoder encoder = new BCF2Encoder();
|
||||
Assert.assertEquals(BCF2Utils.determineIntegerType(ints), expectedType);
|
||||
Assert.assertEquals(BCF2Utils.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Tests managing and skipping multiple blocks
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingTestProviderSequences")
|
||||
public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block) throws IOException {
|
||||
testReadAndSkipWithMultipleBlocks(block, forCombinations);
|
||||
|
|
@ -337,6 +433,82 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
decodeRecord(block2, decoder);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Test encoding / decoding arrays of ints
|
||||
//
|
||||
// This checks that we can encode and decode correctly with the
|
||||
// low-level decodeIntArray function arrays of values. This
|
||||
// has to be pretty comprehensive as decodeIntArray is a highly optimized
|
||||
// piece of code with lots of edge cases. The values we are encoding
|
||||
// don't really matter -- just that the values come back as expected.
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "IntArrays")
|
||||
public Object[][] makeIntArrays() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( int nValues : Arrays.asList(0, 1, 2, 5, 10, 100) ) {
|
||||
for ( int nPad : Arrays.asList(0, 1, 2, 5, 10, 100) ) {
|
||||
int nElements = nValues + nPad;
|
||||
|
||||
List<Integer> values = new ArrayList<Integer>(nElements);
|
||||
|
||||
// add nValues from 0 to nValues - 1
|
||||
for ( int i = 0; i < nValues; i++ )
|
||||
values.add(i);
|
||||
|
||||
// add nPad nulls
|
||||
for ( int i = 0; i < nPad; i++ )
|
||||
values.add(null);
|
||||
|
||||
tests.add(new Object[]{values});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "IntArrays")
|
||||
public void testIntArrays(final List<Integer> ints) throws IOException {
|
||||
final BCF2Encoder encoder = new BCF2Encoder();
|
||||
encoder.encodeTyped(ints, BCF2Type.INT16);
|
||||
|
||||
final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
|
||||
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
|
||||
// read the int[] with the low-level version
|
||||
final int[] decoded = decoder.decodeIntArray(typeDescriptor);
|
||||
|
||||
if ( isMissing(ints) ) {
|
||||
// we expect that the result is null in this case
|
||||
Assert.assertNull(decoded, "Encoded all missing values -- expected null");
|
||||
} else {
|
||||
// we expect at least some values to come back
|
||||
Assert.assertTrue(decoded.length > 0, "Must have at least 1 element for non-null encoded data");
|
||||
|
||||
// check corresponding values
|
||||
for ( int i = 0; i < ints.size(); i++ ) {
|
||||
final Integer expected = ints.get(i);
|
||||
|
||||
if ( expected == null ) {
|
||||
Assert.assertTrue(decoded.length <= i, "we expect decoded to be truncated for missing values");
|
||||
} else {
|
||||
Assert.assertTrue(decoded.length > i, "we expected at least " + i + " values in decoded array");
|
||||
Assert.assertEquals(decoded[i], (int)expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Helper routines
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private final byte[] combineRecords(final byte[] record1, final byte[] record2) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
baos.write(record1);
|
||||
|
|
@ -392,4 +564,12 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
} else
|
||||
Assert.assertEquals(decoded, tv.value);
|
||||
}
|
||||
|
||||
private final boolean isMissing(final List<Integer> values) {
|
||||
if ( values != null )
|
||||
for ( Integer value : values )
|
||||
if ( value != null )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue