Connected UG to the new comp track annotation system in VA. Also, when emit confidence is lower than call confidence (so that we emit records filtered with LowQual), add a corresponding FILTER header field to the VCF so that the validator doesn't complain.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3720 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-07-05 13:04:24 +00:00
parent 3347d1ca7c
commit 36edc60ccc
5 changed files with 19 additions and 25 deletions

View File

@ -155,7 +155,7 @@ public class VariantAnnotatorEngine {
private void initialize(GenomeAnalysisEngine engine) {
// check to see whether a dbsnp rod was included
// check to see whether comp rods were included
List<ReferenceOrderedDataSource> dataSources = engine.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
if ( source.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {

View File

@ -44,7 +44,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
protected JointEstimateGenotypeCalculationModel() {
filter.add("LowQual");
filter.add(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME);
}
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, byte[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> stratifiedContexts) {

View File

@ -29,6 +29,7 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*;
@ -140,18 +141,19 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper
if ( UG_engine.annotateDbsnp )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFHeaderLineType.Integer, "dbSNP Membership"));
if ( UG_engine.annotateHapmap2 )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFHeaderLineType.Integer, "HapMap2 Membership"));
if ( UG_engine.annotateHapmap3 )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFHeaderLineType.Integer, "HapMap3 Membership"));
for ( Map.Entry<String, String> dbSet : UG_engine.dbAnnotations.entrySet() )
headerInfo.add(new VCFInfoHeaderLine(dbSet.getValue(), 0, VCFHeaderLineType.Flag, (dbSet.getKey().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ? "dbSNP" : dbSet.getValue()) + " Membership"));
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
// FORMAT and INFO fields
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings(VCFHeaderVersion.VCF3_3));
// FILTER fields
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ||
UAC.TRIGGER_CONFIDENCE_FOR_EMITTING < UAC.TRIGGER_CONFIDENCE_FOR_CALLING )
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
// all of the arguments from the argument collection
Set<Object> args = new HashSet<Object>();
args.add(UAC);

View File

@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
@ -46,6 +45,7 @@ import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.pileup.*;
import org.broad.tribble.vcf.VCFRecord;
import java.io.PrintStream;
import java.util.*;
@ -54,13 +54,9 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String TRIGGER_TRACK_NAME = "trigger";
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
// should we annotate dbsnp?
protected boolean annotateDbsnp = false;
// should we annotate hapmap2?
protected boolean annotateHapmap2 = false;
// should we annotate hapmap3?
protected boolean annotateHapmap3 = false;
protected HashMap<String, String> dbAnnotations = new HashMap<String, String>();
// the unified argument collection
protected UnifiedArgumentCollection UAC = null;
@ -112,18 +108,14 @@ public class UnifiedGenotyperEngine {
else
this.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
// check to see whether a dbsnp rod was included
// check to see whether comp rods were included
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
this.annotateDbsnp = true;
if ( source.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
dbAnnotations.put(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, VCFRecord.DBSNP_KEY);
}
if ( rod.getName().equals("hapmap2") ) {
this.annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
this.annotateHapmap3 = true;
else if ( source.getName().startsWith(VariantAnnotatorEngine.dbPrefix) ) {
dbAnnotations.put(source.getName(), source.getName().substring(VariantAnnotatorEngine.dbPrefix.length()));
}
}
}

View File

@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("226ca8079db4e46a61367db49bac8b2b"));
Arrays.asList("a38ccaef73e57bed1e5f797b91e7ef38"));
executeTest("testConfidence2", spec2);
}