Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
d405ec2a0d
|
|
@ -92,7 +92,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
if( initialIntervals.overlaps(location) ) {
|
||||
final boolean isActive = walker.isActive( tracker, refContext, locus );
|
||||
final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
|
||||
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
}
|
||||
|
||||
|
|
@ -109,7 +109,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
if( !locusView.hasNext() ) {
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
if( initialIntervals.overlaps(location) ) {
|
||||
final boolean isActive = walker.isActive( tracker, refContext, locus );
|
||||
final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
|
||||
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
}
|
||||
|
||||
|
|
@ -128,7 +128,16 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
// add these blocks of work to the work queue
|
||||
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList );
|
||||
logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
workQueue.addAll( activeRegions );
|
||||
if( walker.activeRegionOutStream == null ) {
|
||||
workQueue.addAll( activeRegions );
|
||||
} else { // Just want to output the active regions to a file, not actually process them
|
||||
for( final ActiveRegion activeRegion : activeRegions ) {
|
||||
if( activeRegion.isActive ) {
|
||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||
if( !workQueue.isEmpty() ) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||
|
|
@ -14,8 +19,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -32,6 +39,31 @@ import java.util.List;
|
|||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
|
||||
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
|
||||
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
|
||||
public PrintStream activeRegionOutStream = null;
|
||||
|
||||
@Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false)
|
||||
protected List<IntervalBinding<Feature>> activeRegionBindings = null;
|
||||
|
||||
public GenomeLocSortedSet presetActiveRegions = null;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
if( activeRegionBindings == null ) { return; }
|
||||
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
|
||||
for ( IntervalBinding intervalBinding : activeRegionBindings ) {
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(this.getToolkit());
|
||||
|
||||
if ( intervals.isEmpty() ) {
|
||||
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
|
||||
}
|
||||
|
||||
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, IntervalSetRule.UNION);
|
||||
}
|
||||
|
||||
presetActiveRegions = IntervalUtils.sortAndMergeIntervals(this.getToolkit().getGenomeLocParser(), allIntervals, IntervalMergingRule.ALL);
|
||||
}
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
return true; // We are keeping all the reads
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDB;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -23,7 +24,7 @@ import java.util.*;
|
|||
* Time: 12:24 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
||||
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
private String motherId;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -84,7 +83,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
|
||||
@ArgumentCollection
|
||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }
|
||||
|
||||
/**
|
||||
* The INFO field will be annotated with information on the most biologically-significant effect
|
||||
|
|
@ -163,6 +161,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
|
||||
protected Boolean LIST = false;
|
||||
|
||||
/**
|
||||
* By default, the dbSNP ID is added only when the ID field in the variant VCF is empty.
|
||||
*/
|
||||
@Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated")
|
||||
protected Boolean ALWAYS_APPEND_DBSNP_ID = false;
|
||||
public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; }
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||
protected boolean indelsOnly = false;
|
||||
|
|
|
|||
|
|
@ -195,11 +195,20 @@ public class VariantAnnotatorEngine {
|
|||
private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map<String, Object> infoAnnotations) {
|
||||
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
|
||||
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
|
||||
String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
|
||||
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
|
||||
|
||||
// put the DB key into the INFO field
|
||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null);
|
||||
// annotate dbsnp id if available and not already there
|
||||
if ( rsID != null && vc.emptyID() )
|
||||
vc = new VariantContextBuilder(vc).id(rsID).make();
|
||||
|
||||
// add the ID if appropriate
|
||||
if ( rsID != null ) {
|
||||
if ( vc.emptyID() ) {
|
||||
vc = new VariantContextBuilder(vc).id(rsID).make();
|
||||
} else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
|
||||
final String newRsID = vc.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID;
|
||||
vc = new VariantContextBuilder(vc).id(newRsID).make();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
boolean overlapsComp = false;
|
||||
for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) {
|
||||
|
|
|
|||
|
|
@ -8,9 +8,9 @@ import java.util.List;
|
|||
public interface AnnotatorCompatibleWalker {
|
||||
|
||||
// getter methods for various used bindings
|
||||
public abstract RodBinding<VariantContext> getVariantRodBinding();
|
||||
public abstract RodBinding<VariantContext> getSnpEffRodBinding();
|
||||
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
|
||||
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
|
||||
public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
|
||||
public abstract boolean alwaysAppendDbsnpId();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -126,10 +126,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
@ArgumentCollection
|
||||
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
||||
public RodBinding<VariantContext> getVariantRodBinding() { return null; }
|
||||
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
|
||||
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
|
||||
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
||||
public boolean alwaysAppendDbsnpId() { return false; }
|
||||
|
||||
/**
|
||||
* A raw, unfiltered, highly specific callset in VCF format.
|
||||
|
|
@ -205,6 +205,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
*
|
||||
**/
|
||||
public void initialize() {
|
||||
// warn the user for misusing EMIT_ALL_SITES
|
||||
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES &&
|
||||
UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY &&
|
||||
UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP )
|
||||
logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode");
|
||||
|
||||
// get all of the unique sample names
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
|
|
|
|||
|
|
@ -54,8 +54,9 @@ public class UnifiedGenotyperEngine {
|
|||
EMIT_VARIANTS_ONLY,
|
||||
/** produces calls at variant sites and confident reference sites */
|
||||
EMIT_ALL_CONFIDENT_SITES,
|
||||
/** produces calls at any callable site regardless of confidence; this argument is intended for point
|
||||
* mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */
|
||||
/** produces calls at any callable site regardless of confidence; this argument is intended only for point
|
||||
* mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by
|
||||
* no means produce a comprehensive set of indels in DISCOVERY mode */
|
||||
EMIT_ALL_SITES
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -149,7 +150,11 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
count = Integer.valueOf(numberStr);
|
||||
|
||||
}
|
||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||
try {
|
||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||
} catch (Exception e) {
|
||||
throw new TribbleException(mapping.get("Type") + " is not a valid type in the VCF specification (note that types are case-sensitive)");
|
||||
}
|
||||
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
|
||||
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
|
||||
|
||||
|
|
|
|||
|
|
@ -110,6 +110,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleIdsWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3withIDs.vcf -L " + validationDataLocation + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("cd7e3d43b8f5579c461b3e588a295fa8"));
|
||||
executeTest("adding multiple IDs with dbSNP", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
|
|||
Loading…
Reference in New Issue