Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-01-25 15:53:32 -05:00
commit d405ec2a0d
10 changed files with 91 additions and 15 deletions

View File

@ -92,7 +92,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// Call the walkers isActive function for this locus and add them to the list to be integrated later
if( initialIntervals.overlaps(location) ) {
final boolean isActive = walker.isActive( tracker, refContext, locus );
final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
}
@ -109,7 +109,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
if( !locusView.hasNext() ) {
// Call the walkers isActive function for this locus and add them to the list to be integrated later
if( initialIntervals.overlaps(location) ) {
final boolean isActive = walker.isActive( tracker, refContext, locus );
final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
}
@ -128,7 +128,16 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// add these blocks of work to the work queue
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList );
logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
workQueue.addAll( activeRegions );
if( walker.activeRegionOutStream == null ) {
workQueue.addAll( activeRegions );
} else { // Just want to output the active regions to a file, not actually process them
for( final ActiveRegion activeRegion : activeRegions ) {
if( activeRegion.isActive ) {
walker.activeRegionOutStream.println( activeRegion.getLocation() );
}
}
}
// Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
if( !workQueue.isEmpty() ) {

View File

@ -1,6 +1,11 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
@ -14,8 +19,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
@ -32,6 +39,31 @@ import java.util.List;
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
public PrintStream activeRegionOutStream = null;
@Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false)
protected List<IntervalBinding<Feature>> activeRegionBindings = null;
public GenomeLocSortedSet presetActiveRegions = null;
@Override
public void initialize() {
if( activeRegionBindings == null ) { return; }
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
for ( IntervalBinding intervalBinding : activeRegionBindings ) {
List<GenomeLoc> intervals = intervalBinding.getIntervals(this.getToolkit());
if ( intervals.isEmpty() ) {
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
}
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, IntervalSetRule.UNION);
}
presetActiveRegions = IntervalUtils.sortAndMergeIntervals(this.getToolkit().getGenomeLocParser(), allIntervals, IntervalMergingRule.ALL);
}
// Do we actually want to operate on the context?
public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
return true; // We are keeping all the reads

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
@ -23,7 +24,7 @@ import java.util.*;
* Time: 12:24 PM
* To change this template use File | Settings | File Templates.
*/
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
private MendelianViolation mendelianViolation = null;
private String motherId;

View File

@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.BaseUtils;
@ -84,7 +83,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }
/**
* The INFO field will be annotated with information on the most biologically-significant effect
@ -163,6 +161,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
protected Boolean LIST = false;
/**
* By default, the dbSNP ID is added only when the ID field in the variant VCF is empty.
*/
@Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated")
protected Boolean ALWAYS_APPEND_DBSNP_ID = false;
public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; }
@Hidden
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false;

View File

@ -195,11 +195,20 @@ public class VariantAnnotatorEngine {
private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map<String, Object> infoAnnotations) {
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
// put the DB key into the INFO field
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null);
// annotate dbsnp id if available and not already there
if ( rsID != null && vc.emptyID() )
vc = new VariantContextBuilder(vc).id(rsID).make();
// add the ID if appropriate
if ( rsID != null ) {
if ( vc.emptyID() ) {
vc = new VariantContextBuilder(vc).id(rsID).make();
} else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
final String newRsID = vc.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID;
vc = new VariantContextBuilder(vc).id(newRsID).make();
}
}
} else {
boolean overlapsComp = false;
for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) {

View File

@ -8,9 +8,9 @@ import java.util.List;
public interface AnnotatorCompatibleWalker {
// getter methods for various used bindings
public abstract RodBinding<VariantContext> getVariantRodBinding();
public abstract RodBinding<VariantContext> getSnpEffRodBinding();
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
public abstract boolean alwaysAppendDbsnpId();
}

View File

@ -126,10 +126,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
public RodBinding<VariantContext> getVariantRodBinding() { return null; }
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
public boolean alwaysAppendDbsnpId() { return false; }
/**
* A raw, unfiltered, highly specific callset in VCF format.
@ -205,6 +205,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
*
**/
public void initialize() {
// warn the user for misusing EMIT_ALL_SITES
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES &&
UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY &&
UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP )
logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode");
// get all of the unique sample names
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());

View File

@ -54,8 +54,9 @@ public class UnifiedGenotyperEngine {
EMIT_VARIANTS_ONLY,
/** produces calls at variant sites and confident reference sites */
EMIT_ALL_CONFIDENT_SITES,
/** produces calls at any callable site regardless of confidence; this argument is intended for point
* mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */
/** produces calls at any callable site regardless of confidence; this argument is intended only for point
* mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by
* no means produce a comprehensive set of indels in DISCOVERY mode */
EMIT_ALL_SITES
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Arrays;
@ -149,7 +150,11 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
count = Integer.valueOf(numberStr);
}
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
try {
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
} catch (Exception e) {
throw new TribbleException(mapping.get("Type") + " is not a valid type in the VCF specification (note that types are case-sensitive)");
}
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");

View File

@ -110,6 +110,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("getting DB tag with dbSNP", spec);
}
@Test
public void testMultipleIdsWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3withIDs.vcf -L " + validationDataLocation + "vcfexample3withIDs.vcf", 1,
Arrays.asList("cd7e3d43b8f5579c461b3e588a295fa8"));
executeTest("adding multiple IDs with dbSNP", spec);
}
@Test
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(