Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Eric Banks 2012-11-19 10:34:44 -05:00
commit 4f243acaa6
7 changed files with 157 additions and 3 deletions

View File

@ -34,6 +34,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
// package access for unit testing
ActivityProfile profile;
@Override
public String getTraversalUnits() {
return "active regions";
@ -53,7 +56,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
int minStart = Integer.MAX_VALUE;
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);

View File

@ -277,8 +277,12 @@ public class VariantAnnotatorEngine {
if ( expression.fieldName.equals("ID") ) {
if ( vc.hasID() )
infoAnnotations.put(expression.fullName, vc.getID());
} else if (expression.fieldName.equals("ALT")) {
infoAnnotations.put(expression.fullName, vc.getAlternateAllele(0).getDisplayString());
} else if ( vc.hasAttribute(expression.fieldName) ) {
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
}
}
}

View File

@ -190,6 +190,10 @@ public class UnifiedGenotyperEngine {
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
if ( vc != null )
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
// todo - uncomment if we want to also emit a null ref call (with no QUAL) if there's no evidence for REF and if EMIT_ALL_SITES is set
// else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
// results.add(generateEmptyContext(tracker, refContext, null, rawContext));
}
}
}

View File

@ -183,6 +183,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
@Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
private boolean keepSitesWithAC0 = false;
@Hidden
@Argument(fullName="numSamples", shortName="numSamples", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
private int numSamplesFromArgument = 0;
/**
* If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying
* variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc.
@ -589,6 +593,14 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; }
public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
public int getNumberOfSamplesForEvaluation() {
if (sampleNamesForEvaluation!= null && !sampleNamesForEvaluation.isEmpty())
return sampleNamesForEvaluation.size();
else {
return numSamplesFromArgument;
}
}
public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }
public List<RodBinding<VariantContext>> getComps() { return comps; }

View File

@ -29,7 +29,7 @@ public class AlleleCount extends VariantStratifier {
// There are ploidy x n sample chromosomes
// TODO -- generalize to handle multiple ploidy
nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * getVariantEvalWalker().getSamplePloidy();
nchrom = getVariantEvalWalker().getNumberOfSamplesForEvaluation() * getVariantEvalWalker().getSamplePloidy();
if ( nchrom < 2 )
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");

View File

@ -103,6 +103,11 @@ public class ActivityProfile {
isActiveList.add(result);
}
// for unit testing
public List<ActivityProfileResult> getActiveList() {
return isActiveList;
}
public int size() {
return isActiveList.size();
}

View File

@ -0,0 +1,126 @@
package org.broadinstitute.sting.gatk.traversals;
import org.testng.Assert;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 11/13/12
* Time: 2:47 PM
*/
public class TraverseActiveRegionsTest extends BaseTest {
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
private final double prob;
public DummyActiveRegionWalker() {
this.prob = 1.0;
}
@Override
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return new ActivityProfileResult(ref.getLocus(), prob);
}
@Override
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return 0;
}
}
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
private IndexedFastaSequenceFile reference;
private GenomeLocParser genomeLocParser;
private ActiveRegionWalker<Integer, Integer> walker;
@BeforeClass
private void init() throws FileNotFoundException {
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
}
@Test
public void testAllIntervalsSeen() throws Exception {
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
intervals.add(interval);
LocusShardDataProvider dataProvider = createDataProvider(intervals);
t.traverse(walker, dataProvider, 0);
boolean allGenomeLocsSeen = true;
for (GenomeLoc loc : intervals) {
boolean thisGenomeLocSeen = false;
for (ActivityProfileResult active : t.profile.getActiveList()) {
if (loc.equals(active.getLoc())) {
thisGenomeLocSeen = true;
break;
}
}
if (!thisGenomeLocSeen) {
allGenomeLocsSeen = false;
break;
}
}
Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile");
}
private LocusShardDataProvider createDataProvider(List<GenomeLoc> intervals) {
walker = new DummyActiveRegionWalker();
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
Shard shard = new MockLocusShard(genomeLocParser, intervals);
WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
//engine.setReferenceDataSource(reference);
engine.setGenomeLocParser(genomeLocParser);
t.initialize(engine);
return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
}
}