A bunch of changes needed to make outputting pooled calls possible

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2073 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-18 18:42:57 +00:00
parent 0a35c8e0ba
commit a70cf2b763
8 changed files with 74 additions and 19 deletions

View File

@ -60,7 +60,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected void initialize(Set<String> samples,
Logger logger,
UnifiedArgumentCollection UAC) {
this.samples = samples;
this.samples = new TreeSet<String>(samples);
this.logger = logger;
baseModel = UAC.baseModel;
heterozygosity = UAC.heterozygosity;

View File

@ -254,6 +254,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
}
if ( locusdata instanceof AlternateAlleleBacked ) {
((AlternateAlleleBacked)locusdata).setAlternateAllele(baseOfMax);
}
if ( locusdata instanceof AlleleFrequencyBacked ) {
((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
// frequenc range doesn't make sense for single samples

View File

@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.*;
@ -89,9 +88,10 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
}
}
verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n",
context.getContext(StratifiedContext.OVERALL).getLocation(),
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
if ( verboseWriter != null )
verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n",
context.getContext(StratifiedContext.OVERALL).getLocation(),
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
return log10L;
}

View File

@ -119,8 +119,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
}
// print them out for debugging (need separate loop to ensure uniqueness)
for ( String sample : samples )
logger.debug("SAMPLE: " + sample);
// for ( String sample : samples )
// logger.debug("SAMPLE: " + sample);
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC);
@ -129,7 +129,12 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
if ( VARIANTS_FILE == null && out == null )
return;
// if we got here, then we were instantiated by the GATK engine
// *** If we got here, then we were instantiated by the GATK engine ***
// if we're in POOLED mode, we no longer want the sample names
// (although they're still stored in the gcm if needed)
if ( UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED )
samples.clear();
// create the output writer stream
if ( VARIANTS_FILE != null )
@ -205,7 +210,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
callsMetrics.nCalledBases++;
// can't make a confident variant call here
if ( value.first == null || value.first.size() == 0 ) {
if ( value.first == null ||
(UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED && value.first.size() == 0) ) {
callsMetrics.nNonConfidentCalls++;
return sum;
}
@ -213,7 +219,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
callsMetrics.nConfidentCalls++;
// if we have a single-sample call (single sample from PointEstimate model returns no genotype locus data)
if ( value.second == null || (!writer.supportsMultiSample() && samples.size() == 1) ) {
if ( value.second == null || (!writer.supportsMultiSample() && samples.size() <= 1) ) {
writer.addGenotypeCall(value.first.get(0));
}

View File

@ -0,0 +1,24 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* Interface AlternateAlleleBacked
*
* this interface indicates that the genotype is
* backed up by alternate allele information.
*/
public interface AlternateAlleleBacked {
/**
*
* @return returns the alternate allele for this genotype
*/
public char getAlternateAllele();
/**
*
* @param alt the alternate allele base for this genotype
*/
public void setAlternateAllele(char alt);
}

View File

@ -13,7 +13,7 @@ import java.util.Map;
* <p/>
* represents the meta data for a genotype object.
*/
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, AlleleFrequencyBacked, ArbitraryFieldsBacked {
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, AlternateAlleleBacked, AlleleFrequencyBacked, ArbitraryFieldsBacked {
// the discovery lod score
private double mConfidence = 0.0;
@ -28,8 +28,9 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
// the location
private GenomeLoc mLoc;
// the ref base
// the ref base and alt bases
private char mRefBase;
private char mAltBase = 'N';
// the id
private String mID;
@ -65,6 +66,22 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
return mLoc;
}
/**
*
* @return returns the alternate allele for this genotype
*/
public char getAlternateAllele() {
return mAltBase;
}
/**
*
* @param alt the alternate allele base for this genotype
*/
public void setAlternateAllele(char alt) {
mAltBase = alt;
}
/**
* get the confidence
*

View File

@ -116,13 +116,18 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
VCFParameters params = new VCFParameters();
params.addFormatItem("GT");
// mapping of our sample names to genotypes
if (genotypes.size() < 1) {
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry");
}
// get the location and reference
params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference());
if ( genotypes.size() == 0 ) {
if ( locusdata == null )
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry or have locusdata");
params.setLocations(locusdata.getLocation(), locusdata.getReference());
// if there is no genotype data, we'll also need to set an alternate allele
params.addAlternateBase(new VCFGenotypeEncoding(String.valueOf(((VCFGenotypeLocusData)locusdata).getAlternateAllele())));
} else {
params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference());
}
Map<String, VCFGenotypeCall> genotypeMap = genotypeListToSampleNameMap(genotypes);

View File

@ -33,7 +33,7 @@ class VCFParameters {
} else {
if (!contig.equals(this.contig))
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
if (position != this.position)
if (location.getStart() != this.position)
throw new IllegalArgumentException("The position has to be the same at a single locus");
if (refBase != this.referenceBase)
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");