Because walkers call UG's map function, we need to move the actual writing out
to UG's reduce function. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1845 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
825e6c7a4d
commit
e740e7a7ce
|
|
@ -16,13 +16,10 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
// We consider the EM stable when the MAF doesn't change more than 1/10N
|
// We consider the EM stable when the MAF doesn't change more than 1/10N
|
||||||
protected static final double EM_STABILITY_METRIC = 0.1;
|
protected static final double EM_STABILITY_METRIC = 0.1;
|
||||||
|
|
||||||
// keep track of some metrics about our calls
|
|
||||||
protected CallMetrics callsMetrics = new CallMetrics();
|
|
||||||
|
|
||||||
|
|
||||||
protected EMGenotypeCalculationModel() {}
|
protected EMGenotypeCalculationModel() {}
|
||||||
|
|
||||||
public List<GenotypeCall> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||||
|
|
||||||
// keep track of the context for each sample, overall and separated by strand
|
// keep track of the context for each sample, overall and separated by strand
|
||||||
int[] baseCounts = new int[4];
|
int[] baseCounts = new int[4];
|
||||||
|
|
@ -51,23 +48,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
|
|
||||||
// generate the calls
|
// generate the calls
|
||||||
GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
|
GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
|
||||||
List<GenotypeCall> calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts);
|
return new Pair<List<GenotypeCall>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
|
||||||
if ( calls != null && calls.size() != 0 ) {
|
|
||||||
|
|
||||||
// use multi-sample mode if we have multiple samples or the output type allows it
|
|
||||||
if ( out.supportsMultiSample() || samples.size() > 1 ) {
|
|
||||||
|
|
||||||
// annoying hack to get around Java generics
|
|
||||||
ArrayList<Genotype> callList = new ArrayList<Genotype>();
|
|
||||||
for ( GenotypeCall call : calls )
|
|
||||||
callList.add(call);
|
|
||||||
|
|
||||||
out.addMultiSampleCall(callList, metadata);
|
|
||||||
} else {
|
|
||||||
out.addGenotypeCall(calls.get(0));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return calls;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<GenotypeCall> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {
|
protected List<GenotypeCall> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {
|
||||||
|
|
@ -76,7 +57,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
// an optimization
|
// an optimization
|
||||||
double expectedChromosomes = 2.0 * (double)GLs.size() * results.getMAF();
|
double expectedChromosomes = 2.0 * (double)GLs.size() * results.getMAF();
|
||||||
if ( expectedChromosomes < 1.0 )
|
if ( expectedChromosomes < 1.0 )
|
||||||
return null;
|
return new ArrayList<GenotypeCall>();
|
||||||
|
|
||||||
ArrayList<GenotypeCall> calls = new ArrayList<GenotypeCall>();
|
ArrayList<GenotypeCall> calls = new ArrayList<GenotypeCall>();
|
||||||
int variantCalls = 0;
|
int variantCalls = 0;
|
||||||
|
|
@ -113,8 +94,6 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
// get the initial genotype likelihoods
|
// get the initial genotype likelihoods
|
||||||
HashMap<String, GenotypeLikelihoods> GLs = initializeGenotypeLikelihoods(ref, contexts, alleleFrequencies, priors, contextType);
|
HashMap<String, GenotypeLikelihoods> GLs = initializeGenotypeLikelihoods(ref, contexts, alleleFrequencies, priors, contextType);
|
||||||
|
|
||||||
callsMetrics.nCalledBases++;
|
|
||||||
|
|
||||||
// The EM loop:
|
// The EM loop:
|
||||||
// we want to continue until the calculation is stable, but we need some max on the number of iterations
|
// we want to continue until the calculation is stable, but we need some max on the number of iterations
|
||||||
int iterations = 0;
|
int iterations = 0;
|
||||||
|
|
@ -233,23 +212,6 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class to keep track of some basic metrics about our calls
|
|
||||||
*/
|
|
||||||
protected class CallMetrics {
|
|
||||||
long nConfidentCalls = 0;
|
|
||||||
long nNonConfidentCalls = 0;
|
|
||||||
long nCalledBases = 0;
|
|
||||||
|
|
||||||
CallMetrics() {}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return String.format("UG: %d confident and %d non-confident calls were made at %d bases",
|
|
||||||
nConfidentCalls, nNonConfidentCalls, nCalledBases);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class to keep track of the EM output
|
* A class to keep track of the EM output
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -20,7 +21,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
|
|
||||||
protected BaseMismatchModel baseModel;
|
protected BaseMismatchModel baseModel;
|
||||||
protected Set<String> samples;
|
protected Set<String> samples;
|
||||||
protected GenotypeWriter out;
|
|
||||||
protected Logger logger;
|
protected Logger logger;
|
||||||
protected double heterozygosity;
|
protected double heterozygosity;
|
||||||
protected EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform;
|
protected EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform;
|
||||||
|
|
@ -40,16 +40,13 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
* Initialize the GenotypeCalculationModel object
|
* Initialize the GenotypeCalculationModel object
|
||||||
* Assumes that out is not null
|
* Assumes that out is not null
|
||||||
* @param samples samples in input bam
|
* @param samples samples in input bam
|
||||||
* @param out output writer
|
|
||||||
* @param logger logger
|
* @param logger logger
|
||||||
* @param UAC unified arg collection
|
* @param UAC unified arg collection
|
||||||
*/
|
*/
|
||||||
protected void initialize(Set<String> samples,
|
protected void initialize(Set<String> samples,
|
||||||
GenotypeWriter out,
|
|
||||||
Logger logger,
|
Logger logger,
|
||||||
UnifiedArgumentCollection UAC) {
|
UnifiedArgumentCollection UAC) {
|
||||||
this.samples = samples;
|
this.samples = samples;
|
||||||
this.out = out;
|
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
baseModel = UAC.baseModel;
|
baseModel = UAC.baseModel;
|
||||||
heterozygosity = UAC.heterozygosity;
|
heterozygosity = UAC.heterozygosity;
|
||||||
|
|
@ -69,7 +66,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
protected Object clone() throws CloneNotSupportedException {
|
protected Object clone() throws CloneNotSupportedException {
|
||||||
GenotypeCalculationModel gcm = (GenotypeCalculationModel)super.clone();
|
GenotypeCalculationModel gcm = (GenotypeCalculationModel)super.clone();
|
||||||
gcm.samples = new HashSet<String>(samples);
|
gcm.samples = new HashSet<String>(samples);
|
||||||
gcm.out = out;
|
|
||||||
gcm.logger = logger;
|
gcm.logger = logger;
|
||||||
gcm.baseModel = baseModel;
|
gcm.baseModel = baseModel;
|
||||||
gcm.heterozygosity = heterozygosity;
|
gcm.heterozygosity = heterozygosity;
|
||||||
|
|
@ -89,10 +85,10 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
* @param context alignment context
|
* @param context alignment context
|
||||||
* @param priors priors to use for GL
|
* @param priors priors to use for GL
|
||||||
*
|
*
|
||||||
* @return list of calls
|
* @return calls
|
||||||
*/
|
*/
|
||||||
public abstract List<GenotypeCall> calculateGenotype(RefMetaDataTracker tracker,
|
public abstract Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker,
|
||||||
char ref,
|
char ref,
|
||||||
AlignmentContext context,
|
AlignmentContext context,
|
||||||
DiploidGenotypePriors priors);
|
DiploidGenotypePriors priors);
|
||||||
}
|
}
|
||||||
|
|
@ -26,7 +26,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import static org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model.*;
|
import static org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
@ -48,12 +47,10 @@ public class GenotypeCalculationModelFactory {
|
||||||
*
|
*
|
||||||
* @param UAC The unified argument collection
|
* @param UAC The unified argument collection
|
||||||
* @param samples samples in bam
|
* @param samples samples in bam
|
||||||
* @param out output writer
|
|
||||||
* @param logger logger
|
* @param logger logger
|
||||||
* @return model
|
* @return model
|
||||||
*/
|
*/
|
||||||
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
|
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
|
||||||
GenotypeWriter out,
|
|
||||||
Logger logger,
|
Logger logger,
|
||||||
UnifiedArgumentCollection UAC) {
|
UnifiedArgumentCollection UAC) {
|
||||||
GenotypeCalculationModel gcm;
|
GenotypeCalculationModel gcm;
|
||||||
|
|
@ -67,7 +64,7 @@ public class GenotypeCalculationModelFactory {
|
||||||
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
|
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
|
||||||
}
|
}
|
||||||
|
|
||||||
gcm.initialize(samples, out, logger, UAC);
|
gcm.initialize(samples, logger, UAC);
|
||||||
return gcm;
|
return gcm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -12,7 +13,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
||||||
protected PointEstimateGenotypeCalculationModel() {}
|
protected PointEstimateGenotypeCalculationModel() {}
|
||||||
|
|
||||||
// overload this method so we can special-case the single sample
|
// overload this method so we can special-case the single sample
|
||||||
public List<GenotypeCall> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||||
|
|
||||||
// we don't actually want to run EM for single samples
|
// we don't actually want to run EM for single samples
|
||||||
if ( samples.size() == 1 ) {
|
if ( samples.size() == 1 ) {
|
||||||
|
|
@ -30,22 +31,10 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
||||||
if ( sampleContext == null )
|
if ( sampleContext == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
callsMetrics.nCalledBases++;
|
|
||||||
|
|
||||||
// create the genotype call object
|
// create the genotype call object
|
||||||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);
|
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);
|
||||||
GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, discoveryGL.second, discoveryGL.first);
|
GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, discoveryGL.second, discoveryGL.first);
|
||||||
|
return new Pair<List<GenotypeCall>, GenotypeMetaData>(Arrays.asList(call), null);
|
||||||
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
|
|
||||||
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
|
|
||||||
if ( confidence >= LOD_THRESHOLD ) {
|
|
||||||
callsMetrics.nConfidentCalls++;
|
|
||||||
out.addGenotypeCall(call);
|
|
||||||
} else {
|
|
||||||
callsMetrics.nNonConfidentCalls++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Arrays.asList(call);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return super.calculateGenotype(tracker, ref, context, priors);
|
return super.calculateGenotype(tracker, ref, context, priors);
|
||||||
|
|
|
||||||
|
|
@ -36,18 +36,22 @@ import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
|
||||||
@ReadFilters({ZeroMappingQualityReadFilter.class, MissingReadGroupFilter.class})
|
@ReadFilters({ZeroMappingQualityReadFilter.class, MissingReadGroupFilter.class})
|
||||||
public class UnifiedGenotyper extends LocusWalker<List<GenotypeCall>, Integer> {
|
public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, GenotypeMetaData>, Integer> {
|
||||||
|
|
||||||
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||||
|
|
||||||
|
|
@ -65,6 +69,11 @@ public class UnifiedGenotyper extends LocusWalker<List<GenotypeCall>, Integer> {
|
||||||
// output writer
|
// output writer
|
||||||
private GenotypeWriter writer;
|
private GenotypeWriter writer;
|
||||||
|
|
||||||
|
// samples in input
|
||||||
|
private HashSet<String> samples;
|
||||||
|
|
||||||
|
// keep track of some metrics about our calls
|
||||||
|
private CallMetrics callsMetrics;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
||||||
|
|
@ -86,7 +95,7 @@ public class UnifiedGenotyper extends LocusWalker<List<GenotypeCall>, Integer> {
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
// get all of the unique sample names
|
// get all of the unique sample names
|
||||||
HashSet<String> samples = new HashSet<String>();
|
samples = new HashSet<String>();
|
||||||
List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups();
|
List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups();
|
||||||
for ( SAMReadGroupRecord readGroup : readGroups )
|
for ( SAMReadGroupRecord readGroup : readGroups )
|
||||||
samples.add(readGroup.getSample());
|
samples.add(readGroup.getSample());
|
||||||
|
|
@ -104,7 +113,8 @@ public class UnifiedGenotyper extends LocusWalker<List<GenotypeCall>, Integer> {
|
||||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
|
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
|
||||||
this.getToolkit().getArguments().referenceFile.getName());
|
this.getToolkit().getArguments().referenceFile.getName());
|
||||||
|
|
||||||
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, writer, logger, UAC);
|
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC);
|
||||||
|
callsMetrics = new CallMetrics();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -114,7 +124,7 @@ public class UnifiedGenotyper extends LocusWalker<List<GenotypeCall>, Integer> {
|
||||||
* @param refContext the reference base
|
* @param refContext the reference base
|
||||||
* @param context contextual information around the locus
|
* @param context contextual information around the locus
|
||||||
*/
|
*/
|
||||||
public List<GenotypeCall> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
public Pair<List<GenotypeCall>, GenotypeMetaData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
||||||
char ref = Character.toUpperCase(refContext.getBase());
|
char ref = Character.toUpperCase(refContext.getBase());
|
||||||
if ( !BaseUtils.isRegularBase(ref) )
|
if ( !BaseUtils.isRegularBase(ref) )
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -151,7 +161,47 @@ public class UnifiedGenotyper extends LocusWalker<List<GenotypeCall>, Integer> {
|
||||||
|
|
||||||
public Integer reduceInit() { return 0; }
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
public Integer reduce(List<GenotypeCall> value, Integer sum) {
|
public Integer reduce(Pair<List<GenotypeCall>, GenotypeMetaData> value, Integer sum) {
|
||||||
|
if ( value == null || value.first == null )
|
||||||
|
return sum;
|
||||||
|
|
||||||
|
callsMetrics.nCalledBases++;
|
||||||
|
|
||||||
|
if ( value.first.size() == 0 )
|
||||||
|
return sum;
|
||||||
|
|
||||||
|
// special-case for single-sample using PointEstimate model
|
||||||
|
if ( value.second == null ) {
|
||||||
|
GenotypeCall call = value.first.get(0);
|
||||||
|
if ( UAC.GENOTYPE || call.isVariant(call.getReference()) ) {
|
||||||
|
double confidence = (UAC.GENOTYPE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
|
||||||
|
if ( confidence >= UAC.LOD_THRESHOLD ) {
|
||||||
|
callsMetrics.nConfidentCalls++;
|
||||||
|
writer.addGenotypeCall(call);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
callsMetrics.nNonConfidentCalls++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// use multi-sample mode if we have multiple samples or the output type allows it
|
||||||
|
else if ( writer.supportsMultiSample() || samples.size() > 1 ) {
|
||||||
|
|
||||||
|
// annoying hack to get around Java generics
|
||||||
|
ArrayList<Genotype> callList = new ArrayList<Genotype>();
|
||||||
|
for ( GenotypeCall call : value.first )
|
||||||
|
callList.add(call);
|
||||||
|
|
||||||
|
callsMetrics.nConfidentCalls++;
|
||||||
|
writer.addMultiSampleCall(callList, value.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise, use single sample mode
|
||||||
|
else {
|
||||||
|
callsMetrics.nConfidentCalls++;
|
||||||
|
writer.addGenotypeCall(value.first.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
return sum + 1;
|
return sum + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -160,4 +210,20 @@ public class UnifiedGenotyper extends LocusWalker<List<GenotypeCall>, Integer> {
|
||||||
logger.info("Processed " + sum + " loci that are callable for SNPs");
|
logger.info("Processed " + sum + " loci that are callable for SNPs");
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class to keep track of some basic metrics about our calls
|
||||||
|
*/
|
||||||
|
protected class CallMetrics {
|
||||||
|
long nConfidentCalls = 0;
|
||||||
|
long nNonConfidentCalls = 0;
|
||||||
|
long nCalledBases = 0;
|
||||||
|
|
||||||
|
CallMetrics() {}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("UG: %d confident and %d non-confident calls were made at %d bases",
|
||||||
|
nConfidentCalls, nNonConfidentCalls, nCalledBases);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
|
@ -80,8 +81,10 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Referen
|
||||||
|
|
||||||
|
|
||||||
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||||
List<GenotypeCall> calls = ug.map(tracker,ref,context);
|
Pair<List<GenotypeCall>, GenotypeMetaData> calls = ug.map(tracker,ref,context);
|
||||||
return ( ! calls.get(0).isVariant(ref.getBase()) && calls.get(0).getNegLog10PError() >= confidentRefThreshold );
|
if (calls == null)
|
||||||
|
return false;
|
||||||
|
return ( ! calls.first.get(0).isVariant(ref.getBase()) && calls.first.get(0).getNegLog10PError() >= confidentRefThreshold );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.ListUtils;
|
import org.broadinstitute.sting.utils.ListUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.genotype.*;
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
|
|
||||||
|
|
@ -80,9 +81,9 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
||||||
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
|
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
|
||||||
|
|
||||||
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
|
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
|
||||||
List<GenotypeCall> calls = UG.map(tracker, ref, subContext);
|
Pair<List<GenotypeCall>, GenotypeMetaData> calls = UG.map(tracker, ref, subContext);
|
||||||
if (calls != null) {
|
if (calls != null && calls.first != null) {
|
||||||
GenotypeCall call = calls.get(0);
|
GenotypeCall call = calls.first.get(0);
|
||||||
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
||||||
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
|
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -70,14 +72,14 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
|
||||||
}
|
}
|
||||||
|
|
||||||
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
|
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
|
||||||
List<GenotypeCall> parent1 = UG.map(tracker, ref, parent1_subContext);
|
Pair<List<GenotypeCall>, GenotypeMetaData> parent1 = UG.map(tracker, ref, parent1_subContext);
|
||||||
|
|
||||||
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
|
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
|
||||||
List<GenotypeCall> parent2 = UG.map(tracker, ref, parent2_subContext);
|
Pair<List<GenotypeCall>, GenotypeMetaData> parent2 = UG.map(tracker, ref, parent2_subContext);
|
||||||
|
|
||||||
if ( parent1 != null && parent2 != null ) {
|
if ( parent1 != null && parent1.first != null && parent2 != null && parent2.first != null ) {
|
||||||
GenotypeCall parent1call = parent1.get(0);
|
GenotypeCall parent1call = parent1.first.get(0);
|
||||||
GenotypeCall parent2call = parent2.get(0);
|
GenotypeCall parent2call = parent2.first.get(0);
|
||||||
|
|
||||||
if (!parent1call.isVariant(parent1call.getReference()) &&
|
if (!parent1call.isVariant(parent1call.getReference()) &&
|
||||||
parent1call.getNegLog10PError() > 5 &&
|
parent1call.getNegLog10PError() > 5 &&
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,9 @@ import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
|
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -257,9 +257,9 @@ public class ArtificialPoolContext {
|
||||||
public Genotype getGenotype(int group) {
|
public Genotype getGenotype(int group) {
|
||||||
AlignmentContext alicon = this.getAlignmentContext();
|
AlignmentContext alicon = this.getAlignmentContext();
|
||||||
Pair<List<SAMRecord>[],List<Integer>[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets());
|
Pair<List<SAMRecord>[],List<Integer>[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets());
|
||||||
List<? extends Genotype> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(),
|
Pair<List<GenotypeCall>, GenotypeMetaData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(),
|
||||||
new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group]));
|
new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group]));
|
||||||
return (result == null ? null : result.get(0));
|
return (result.first == null ? null : result.first.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<SAMRecord>[] sampleReads(List<SAMRecord>[] reads, double[] propEstGlobal) {
|
public static List<SAMRecord>[] sampleReads(List<SAMRecord>[] reads, double[] propEstGlobal) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue