Moved ApplyVariantClusters over to VariationContext

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3207 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-04-20 16:20:25 +00:00
parent cc3061538f
commit 124b7a2a58
3 changed files with 31 additions and 32 deletions

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.utils.Utils;
import java.util.*;
/**
* This class emcompasses all the basic information about a genotype. It is immutable.
* This class encompasses all the basic information about a genotype. It is immutable.
*
* @author Mark DePristo
*/

View File

@ -27,11 +27,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@ -85,6 +86,7 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
private VariantGaussianMixtureModel theModel = null;
private VCFWriter vcfWriter;
private Set<String> ignoreInputFilterSet = null;
private final ArrayList<String> ALLOWED_FORMAT_FIELDS = new ArrayList<String>();
//---------------------------------------------------------------------------------------------------------------
@ -111,6 +113,11 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
throw new StingException( "Variant Optimization Model is unrecognized. Implemented options are GAUSSIAN_MIXTURE_MODEL and K_NEAREST_NEIGHBORS" );
}
ALLOWED_FORMAT_FIELDS.add(VCFGenotypeRecord.GENOTYPE_KEY); // copied from VariantsToVCF
ALLOWED_FORMAT_FIELDS.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY);
ALLOWED_FORMAT_FIELDS.add(VCFGenotypeRecord.DEPTH_KEY);
ALLOWED_FORMAT_FIELDS.add(VCFGenotypeRecord.GENOTYPE_POSTERIORS_TRIPLET_KEY);
// setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
@ -145,29 +152,17 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
return mapList;
}
for( final GATKFeature feature : tracker.getAllRods() ) {
Object rod = feature.getUnderlyingObject();
if( rod != null && rod instanceof RodVCF ) {
final RodVCF rodVCF = ((RodVCF) rod);
//BUGBUG: figure out how to make this use VariantContext to be consistent with other VariantOptimizer walkers
// need to convert vc into VCFRecord to write it out?
if( rodVCF.isSNP() &&
(!rodVCF.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(Arrays.asList(rodVCF.getFilteringCodes())))) ) {
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
final VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), ALLOWED_FORMAT_FIELDS, false, false);
if( vc != null && vc.isSNP() ) {
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
final VariantDatum variantDatum = new VariantDatum();
variantDatum.isTransition = BaseUtils.isTransition((byte)rodVCF.getAlternativeBaseForSNP(), (byte)rodVCF.getReferenceForSNP()); //vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
variantDatum.isKnown = !rodVCF.isNovel(); //!vc.getAttribute("ID").equals(".");
int numHet = 0;
int numHom = 0;
for( final VCFGenotypeRecord rec : rodVCF.getVCFGenotypeRecords() ) {
if( rec.isHet() ) { numHet++; }
else if( rec.isHom() ) { numHom++; }
}
variantDatum.isHet = numHet > numHom; //vc.getHetCount() > vc.getHomVarCount(); // BUGBUG: what to do here for multi sample calls?
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
variantDatum.isKnown = !vc.getAttribute("ID").equals(".");
variantDatum.isHet = vc.getHetCount() > vc.getHomVarCount(); // BUGBUG: what to do here for multi sample calls?
final double pTrue = theModel.evaluateVariant( rodVCF.getInfoValues(), rodVCF.getQual(), variantDatum.isHet );
final double recalQual = QualityUtils.phredScaleErrorRate( Math.max( 1.0 - pTrue, 0.000000001) );
final double pTrue = theModel.evaluateVariant( vc.getAttributes(), vc.getPhredScaledQual(), variantDatum.isHet );
final double recalQual = QualityUtils.phredScaleErrorRate( Math.max(1.0 - pTrue, 0.000000001) );
if( variantDatum.isKnown && KNOWN_VAR_QUAL_PRIOR > 0.1 ) {
variantDatum.qual = 0.5 * recalQual + 0.5 * KNOWN_VAR_QUAL_PRIOR;
@ -176,14 +171,17 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
}
mapList.add( variantDatum );
rodVCF.mCurrentRecord.addInfoField("OQ", ((Double)rodVCF.getQual()).toString() );
rodVCF.mCurrentRecord.setQual( variantDatum.qual );
rodVCF.mCurrentRecord.setFilterString(VCFRecord.UNFILTERED);
vcfWriter.addRecord( rodVCF.mCurrentRecord );
vcf.addInfoField("OQ", ((Double)vc.getPhredScaledQual()).toString() );
vcf.setQual( variantDatum.qual );
vcf.setFilterString(VCFRecord.UNFILTERED);
vcfWriter.addRecord( vcf );
} else { // not a SNP or is filtered so just dump it out to the VCF file
vcfWriter.addRecord( rodVCF.mCurrentRecord );
System.out.println(vc.getGenotype("NA12878").getAttributes());
System.out.println(vcf.getGenotype("NA12878").getFields());
System.out.println();
vcfWriter.addRecord( vcf );
}
}

View File

@ -541,19 +541,20 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
}
}
public final double evaluateVariant( final Map<String,String> annotationMap, final double qualityScore, final boolean isHet ) {
public final double evaluateVariant( final Map<String,Object> annotationMap, final double qualityScore, final boolean isHet ) {
final double[] pVarInCluster = new double[numGaussians];
final double[] annotations = new double[dataManager.numAnnotations];
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
double value = 0.0;
if( dataManager.annotationKeys.get(jjj).equals("QUAL") ) {
final String annotationKey = dataManager.annotationKeys.get(jjj);
if( annotationKey.equals("QUAL") ) {
value = qualityScore;
} else {
try {
final String stringValue = annotationMap.get( dataManager.annotationKeys.get(jjj) );
final Object stringValue = annotationMap.get( annotationKey );
if( stringValue != null ) {
value = Double.parseDouble( stringValue );
value = Double.parseDouble( stringValue.toString() );
if( Double.isInfinite(value) ) {
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
}