A bunch of changes needed to make outputting pooled calls possible

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2073 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-18 18:42:57 +00:00
parent 0a35c8e0ba
commit a70cf2b763
8 changed files with 74 additions and 19 deletions

View File

@ -60,7 +60,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected void initialize(Set<String> samples, protected void initialize(Set<String> samples,
Logger logger, Logger logger,
UnifiedArgumentCollection UAC) { UnifiedArgumentCollection UAC) {
this.samples = samples; this.samples = new TreeSet<String>(samples);
this.logger = logger; this.logger = logger;
baseModel = UAC.baseModel; baseModel = UAC.baseModel;
heterozygosity = UAC.heterozygosity; heterozygosity = UAC.heterozygosity;

View File

@ -254,6 +254,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( locusdata instanceof ConfidenceBacked ) { if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
} }
if ( locusdata instanceof AlternateAlleleBacked ) {
((AlternateAlleleBacked)locusdata).setAlternateAllele(baseOfMax);
}
if ( locusdata instanceof AlleleFrequencyBacked ) { if ( locusdata instanceof AlleleFrequencyBacked ) {
((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); ((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
// frequenc range doesn't make sense for single samples // frequenc range doesn't make sense for single samples

View File

@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import java.util.*; import java.util.*;
@ -89,9 +88,10 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
} }
} }
verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n", if ( verboseWriter != null )
context.getContext(StratifiedContext.OVERALL).getLocation(), verboseWriter.printf("POOL_DEBUG %s %c %c %d %d %d %.2f %.2f %.2f %f%n",
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L); context.getContext(StratifiedContext.OVERALL).getLocation(),
refArg, altArg, nChromosomes, nAltAlleles, nRefAlleles, f, log10POfRef, log10POfAlt, log10L);
return log10L; return log10L;
} }

View File

@ -119,8 +119,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
} }
// print them out for debugging (need separate loop to ensure uniqueness) // print them out for debugging (need separate loop to ensure uniqueness)
for ( String sample : samples ) // for ( String sample : samples )
logger.debug("SAMPLE: " + sample); // logger.debug("SAMPLE: " + sample);
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC); gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC);
@ -129,7 +129,12 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
if ( VARIANTS_FILE == null && out == null ) if ( VARIANTS_FILE == null && out == null )
return; return;
// if we got here, then we were instantiated by the GATK engine // *** If we got here, then we were instantiated by the GATK engine ***
// if we're in POOLED mode, we no longer want the sample names
// (although they're still stored in the gcm if needed)
if ( UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED )
samples.clear();
// create the output writer stream // create the output writer stream
if ( VARIANTS_FILE != null ) if ( VARIANTS_FILE != null )
@ -205,7 +210,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
callsMetrics.nCalledBases++; callsMetrics.nCalledBases++;
// can't make a confident variant call here // can't make a confident variant call here
if ( value.first == null || value.first.size() == 0 ) { if ( value.first == null ||
(UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED && value.first.size() == 0) ) {
callsMetrics.nNonConfidentCalls++; callsMetrics.nNonConfidentCalls++;
return sum; return sum;
} }
@ -213,7 +219,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
callsMetrics.nConfidentCalls++; callsMetrics.nConfidentCalls++;
// if we have a single-sample call (single sample from PointEstimate model returns no genotype locus data) // if we have a single-sample call (single sample from PointEstimate model returns no genotype locus data)
if ( value.second == null || (!writer.supportsMultiSample() && samples.size() == 1) ) { if ( value.second == null || (!writer.supportsMultiSample() && samples.size() <= 1) ) {
writer.addGenotypeCall(value.first.get(0)); writer.addGenotypeCall(value.first.get(0));
} }

View File

@ -0,0 +1,24 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* Interface AlternateAlleleBacked
*
* this interface indicates that the genotype is
* backed up by alternate allele information.
*/
public interface AlternateAlleleBacked {
/**
*
* @return returns the alternate allele for this genotype
*/
public char getAlternateAllele();
/**
*
* @param alt the alternate allele base for this genotype
*/
public void setAlternateAllele(char alt);
}

View File

@ -13,7 +13,7 @@ import java.util.Map;
* <p/> * <p/>
* represents the meta data for a genotype object. * represents the meta data for a genotype object.
*/ */
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, AlleleFrequencyBacked, ArbitraryFieldsBacked { public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, AlternateAlleleBacked, AlleleFrequencyBacked, ArbitraryFieldsBacked {
// the discovery lod score // the discovery lod score
private double mConfidence = 0.0; private double mConfidence = 0.0;
@ -28,8 +28,9 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
// the location // the location
private GenomeLoc mLoc; private GenomeLoc mLoc;
// the ref base // the ref base and alt bases
private char mRefBase; private char mRefBase;
private char mAltBase = 'N';
// the id // the id
private String mID; private String mID;
@ -65,6 +66,22 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
return mLoc; return mLoc;
} }
/**
*
* @return returns the alternate allele for this genotype
*/
public char getAlternateAllele() {
return mAltBase;
}
/**
*
* @param alt the alternate allele base for this genotype
*/
public void setAlternateAllele(char alt) {
mAltBase = alt;
}
/** /**
* get the confidence * get the confidence
* *

View File

@ -116,13 +116,18 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
VCFParameters params = new VCFParameters(); VCFParameters params = new VCFParameters();
params.addFormatItem("GT"); params.addFormatItem("GT");
// mapping of our sample names to genotypes
if (genotypes.size() < 1) {
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry");
}
// get the location and reference // get the location and reference
params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference()); if ( genotypes.size() == 0 ) {
if ( locusdata == null )
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry or have locusdata");
params.setLocations(locusdata.getLocation(), locusdata.getReference());
// if there is no genotype data, we'll also need to set an alternate allele
params.addAlternateBase(new VCFGenotypeEncoding(String.valueOf(((VCFGenotypeLocusData)locusdata).getAlternateAllele())));
} else {
params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference());
}
Map<String, VCFGenotypeCall> genotypeMap = genotypeListToSampleNameMap(genotypes); Map<String, VCFGenotypeCall> genotypeMap = genotypeListToSampleNameMap(genotypes);

View File

@ -33,7 +33,7 @@ class VCFParameters {
} else { } else {
if (!contig.equals(this.contig)) if (!contig.equals(this.contig))
throw new IllegalArgumentException("The contig name has to be the same at a single locus"); throw new IllegalArgumentException("The contig name has to be the same at a single locus");
if (position != this.position) if (location.getStart() != this.position)
throw new IllegalArgumentException("The position has to be the same at a single locus"); throw new IllegalArgumentException("The position has to be the same at a single locus");
if (refBase != this.referenceBase) if (refBase != this.referenceBase)
throw new IllegalArgumentException("The reference base name has to be the same at a single locus"); throw new IllegalArgumentException("The reference base name has to be the same at a single locus");