Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable

This commit is contained in:
Chris Hartl 2012-11-28 10:49:16 -05:00
commit f95803ee05
5 changed files with 53 additions and 20 deletions

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import com.google.java.contract.Ensures;
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Input;
@ -75,6 +76,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
} }
// Determine probability of active status over the AlignmentContext // Determine probability of active status over the AlignmentContext
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context); public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
// Map over the ActiveRegion // Map over the ActiveRegion

View File

@ -283,7 +283,7 @@ public class DiffEngine {
GATKReport report = new GATKReport(); GATKReport report = new GATKReport();
final String tableName = "differences"; final String tableName = "differences";
// TODO for Geraldine -- link needs to be updated below // TODO for Geraldine -- link needs to be updated below
report.addTable(tableName, "Summarized differences between the master and test files. See [ask Geraldine to fix link to DiffEngine wiki] for more information", 3); report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3);
final GATKReportTable table = report.getTable(tableName); final GATKReportTable table = report.getTable(tableName);
table.addColumn("Difference"); table.addColumn("Difference");
table.addColumn("NumberOfOccurrences"); table.addColumn("NumberOfOccurrences");

View File

@ -95,7 +95,8 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> { public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
private static final boolean DEBUG = false; @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information (if -l DEBUG is also specified)", required = false)
protected boolean DEBUG = false;
/** /**
* The VCF file we are phasing variants from. * The VCF file we are phasing variants from.
* *
@ -949,7 +950,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
} }
if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n"); if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true); MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, DEBUG);
double posteriorProb = maxHapQual.maxEntry.getScore().getValue(); double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
if (DEBUG) if (DEBUG)
@ -971,7 +972,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) { public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
// Marginalize each haplotype to its first 2 positions: // Marginalize each haplotype to its first 2 positions:
hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable); hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
if (DEBUG && printDebug) if (printDebug)
logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n"); logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");
calculateMaxHapAndPhasingQuality(hapTable, printDebug); calculateMaxHapAndPhasingQuality(hapTable, printDebug);
@ -981,7 +982,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
private void calculateMaxHapAndPhasingQuality(PhasingTable hapTable, boolean printDebug) { private void calculateMaxHapAndPhasingQuality(PhasingTable hapTable, boolean printDebug) {
hapTable.normalizeScores(); hapTable.normalizeScores();
if (DEBUG && printDebug) if (printDebug)
logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n"); logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");
// Determine the phase at this position: // Determine the phase at this position:

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.traversals; package org.broadinstitute.sting.gatk.traversals;
import com.google.java.contract.PreconditionError;
import net.sf.samtools.*; import net.sf.samtools.*;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -52,6 +53,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
this.prob = 1.0; this.prob = 1.0;
} }
public DummyActiveRegionWalker(double constProb) {
this.prob = constProb;
}
@Override @Override
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
isActiveCalls.add(ref.getLocus()); isActiveCalls.add(ref.getLocus());
@ -96,8 +101,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999)); intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999)); intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000)); intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
// TODO: this fails! intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
//intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList(); intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
reads = new ArrayList<SAMRecord>(); reads = new ArrayList<SAMRecord>();
@ -105,12 +109,11 @@ public class TraverseActiveRegionsTest extends BaseTest {
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20)); reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21)); reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009)); reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050)); reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
reads.add(buildSAMRecord("extended_only", "1", 3000, 3100)); reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990)); reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000)); reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
// TODO reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
//reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
} }
@Test @Test
@ -134,6 +137,18 @@ public class TraverseActiveRegionsTest extends BaseTest {
return activeIntervals; return activeIntervals;
} }
@Test (expectedExceptions = PreconditionError.class)
public void testIsActiveRangeLow () {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(-0.1);
getActiveRegions(walker, intervals).values();
}
@Test (expectedExceptions = PreconditionError.class)
public void testIsActiveRangeHigh () {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(1.1);
getActiveRegions(walker, intervals).values();
}
@Test @Test
public void testActiveRegionCoverage() { public void testActiveRegionCoverage() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
@ -199,14 +214,12 @@ public class TraverseActiveRegionsTest extends BaseTest {
// simple: Primary in 1:1-999 // simple: Primary in 1:1-999
// overlap_equal: Primary in 1:1-999 // overlap_equal: Primary in 1:1-999
// overlap_unequal: Primary in 1:1-999 // overlap_unequal: Primary in 1:1-999
// boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_only: Extended in 1:2000-2999 // extended_only: Extended in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none // outside_intervals: none
// simple20: Primary in 20:10000-10100
// TODO
// simple20: Primary in 20:10000-20000
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals); Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
ActiveRegion region; ActiveRegion region;
@ -221,28 +234,43 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "extended_only"); verifyReadNotPlaced(region, "extended_only");
// TODO: fail verifyReadNonPrimary(region, "extended_and_np"); // TODO: fail verifyReadNonPrimary(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals"); verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
verifyReadNotPlaced(region, "simple"); verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal"); verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal"); verifyReadNotPlaced(region, "overlap_unequal");
// TODO: fail verifyReadPrimary(region, "boundary_equal"); // TODO: fail verifyReadNonPrimary(region, "boundary_equal");
// TODO: fail verifyReadNonPrimary(region, "boundary_unequal"); verifyReadPrimary(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only"); verifyReadNotPlaced(region, "extended_only");
// TODO: fail verifyReadPrimary(region, "extended_and_np"); // TODO: fail verifyReadPrimary(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals"); verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadNotPlaced(region, "simple"); verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal"); verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal"); verifyReadNotPlaced(region, "overlap_unequal");
// TODO: fail verifyReadNonPrimary(region, "boundary_equal"); verifyReadPrimary(region, "boundary_equal");
verifyReadPrimary(region, "boundary_unequal"); // TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
// TODO: fail verifyReadExtended(region, "extended_only"); // TODO: fail verifyReadExtended(region, "extended_only");
// TODO: fail verifyReadExtended(region, "extended_and_np"); // TODO: fail verifyReadExtended(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals"); verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadPrimary(region, "simple20");
// TODO: more tests and edge cases // TODO: more tests and edge cases
} }
@ -282,6 +310,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
t.traverse(walker, dataProvider, 0); t.traverse(walker, dataProvider, 0);
t.endTraversal(walker, 0);
return walker.mappedActiveRegions; return walker.mappedActiveRegions;
} }

View File

@ -793,7 +793,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
int sampleI = 0; int sampleI = 0;
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) { for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles)); genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
} }
genotypes.add(GenotypeBuilder.createMissing("missing", 2)); genotypes.add(GenotypeBuilder.createMissing("missing", 2));