Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable
This commit is contained in:
commit
f95803ee05
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
|
@ -75,6 +76,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine probability of active status over the AlignmentContext
|
// Determine probability of active status over the AlignmentContext
|
||||||
|
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
|
||||||
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||||
|
|
||||||
// Map over the ActiveRegion
|
// Map over the ActiveRegion
|
||||||
|
|
|
||||||
|
|
@ -283,7 +283,7 @@ public class DiffEngine {
|
||||||
GATKReport report = new GATKReport();
|
GATKReport report = new GATKReport();
|
||||||
final String tableName = "differences";
|
final String tableName = "differences";
|
||||||
// TODO for Geraldine -- link needs to be updated below
|
// TODO for Geraldine -- link needs to be updated below
|
||||||
report.addTable(tableName, "Summarized differences between the master and test files. See [ask Geraldine to fix link to DiffEngine wiki] for more information", 3);
|
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3);
|
||||||
final GATKReportTable table = report.getTable(tableName);
|
final GATKReportTable table = report.getTable(tableName);
|
||||||
table.addColumn("Difference");
|
table.addColumn("Difference");
|
||||||
table.addColumn("NumberOfOccurrences");
|
table.addColumn("NumberOfOccurrences");
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,8 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
|
||||||
|
|
||||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
|
public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
|
||||||
private static final boolean DEBUG = false;
|
@Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information (if -l DEBUG is also specified)", required = false)
|
||||||
|
protected boolean DEBUG = false;
|
||||||
/**
|
/**
|
||||||
* The VCF file we are phasing variants from.
|
* The VCF file we are phasing variants from.
|
||||||
*
|
*
|
||||||
|
|
@ -949,7 +950,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
|
if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
|
||||||
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
|
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, DEBUG);
|
||||||
double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
|
double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
|
||||||
|
|
||||||
if (DEBUG)
|
if (DEBUG)
|
||||||
|
|
@ -971,7 +972,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
||||||
public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
|
public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
|
||||||
// Marginalize each haplotype to its first 2 positions:
|
// Marginalize each haplotype to its first 2 positions:
|
||||||
hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
|
hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
|
||||||
if (DEBUG && printDebug)
|
if (printDebug)
|
||||||
logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");
|
logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");
|
||||||
|
|
||||||
calculateMaxHapAndPhasingQuality(hapTable, printDebug);
|
calculateMaxHapAndPhasingQuality(hapTable, printDebug);
|
||||||
|
|
@ -981,7 +982,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
||||||
|
|
||||||
private void calculateMaxHapAndPhasingQuality(PhasingTable hapTable, boolean printDebug) {
|
private void calculateMaxHapAndPhasingQuality(PhasingTable hapTable, boolean printDebug) {
|
||||||
hapTable.normalizeScores();
|
hapTable.normalizeScores();
|
||||||
if (DEBUG && printDebug)
|
if (printDebug)
|
||||||
logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");
|
logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");
|
||||||
|
|
||||||
// Determine the phase at this position:
|
// Determine the phase at this position:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.traversals;
|
package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
|
import com.google.java.contract.PreconditionError;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
|
|
@ -52,6 +53,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
this.prob = 1.0;
|
this.prob = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DummyActiveRegionWalker(double constProb) {
|
||||||
|
this.prob = constProb;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
isActiveCalls.add(ref.getLocus());
|
isActiveCalls.add(ref.getLocus());
|
||||||
|
|
@ -96,8 +101,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||||
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
|
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
|
||||||
// TODO: this fails!
|
intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||||
//intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
|
|
||||||
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
|
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
|
||||||
|
|
||||||
reads = new ArrayList<SAMRecord>();
|
reads = new ArrayList<SAMRecord>();
|
||||||
|
|
@ -105,12 +109,11 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
|
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
|
||||||
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
|
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
|
||||||
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
|
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
|
||||||
reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050));
|
reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
|
||||||
reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
|
reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
|
||||||
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
||||||
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
||||||
// TODO
|
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
|
||||||
//reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -134,6 +137,18 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
return activeIntervals;
|
return activeIntervals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test (expectedExceptions = PreconditionError.class)
|
||||||
|
public void testIsActiveRangeLow () {
|
||||||
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(-0.1);
|
||||||
|
getActiveRegions(walker, intervals).values();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test (expectedExceptions = PreconditionError.class)
|
||||||
|
public void testIsActiveRangeHigh () {
|
||||||
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(1.1);
|
||||||
|
getActiveRegions(walker, intervals).values();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testActiveRegionCoverage() {
|
public void testActiveRegionCoverage() {
|
||||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||||
|
|
@ -199,14 +214,12 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
// simple: Primary in 1:1-999
|
// simple: Primary in 1:1-999
|
||||||
// overlap_equal: Primary in 1:1-999
|
// overlap_equal: Primary in 1:1-999
|
||||||
// overlap_unequal: Primary in 1:1-999
|
// overlap_unequal: Primary in 1:1-999
|
||||||
// boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||||
// boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||||
// extended_only: Extended in 1:2000-2999
|
// extended_only: Extended in 1:2000-2999
|
||||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||||
// outside_intervals: none
|
// outside_intervals: none
|
||||||
|
// simple20: Primary in 20:10000-10100
|
||||||
// TODO
|
|
||||||
// simple20: Primary in 20:10000-20000
|
|
||||||
|
|
||||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||||
ActiveRegion region;
|
ActiveRegion region;
|
||||||
|
|
@ -221,28 +234,43 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
verifyReadNotPlaced(region, "extended_only");
|
verifyReadNotPlaced(region, "extended_only");
|
||||||
// TODO: fail verifyReadNonPrimary(region, "extended_and_np");
|
// TODO: fail verifyReadNonPrimary(region, "extended_and_np");
|
||||||
verifyReadNotPlaced(region, "outside_intervals");
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||||
|
|
||||||
verifyReadNotPlaced(region, "simple");
|
verifyReadNotPlaced(region, "simple");
|
||||||
verifyReadNotPlaced(region, "overlap_equal");
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
verifyReadNotPlaced(region, "overlap_unequal");
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
// TODO: fail verifyReadPrimary(region, "boundary_equal");
|
// TODO: fail verifyReadNonPrimary(region, "boundary_equal");
|
||||||
// TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
|
verifyReadPrimary(region, "boundary_unequal");
|
||||||
verifyReadNotPlaced(region, "extended_only");
|
verifyReadNotPlaced(region, "extended_only");
|
||||||
// TODO: fail verifyReadPrimary(region, "extended_and_np");
|
// TODO: fail verifyReadPrimary(region, "extended_and_np");
|
||||||
verifyReadNotPlaced(region, "outside_intervals");
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||||
|
|
||||||
verifyReadNotPlaced(region, "simple");
|
verifyReadNotPlaced(region, "simple");
|
||||||
verifyReadNotPlaced(region, "overlap_equal");
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
verifyReadNotPlaced(region, "overlap_unequal");
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
// TODO: fail verifyReadNonPrimary(region, "boundary_equal");
|
verifyReadPrimary(region, "boundary_equal");
|
||||||
verifyReadPrimary(region, "boundary_unequal");
|
// TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
|
||||||
// TODO: fail verifyReadExtended(region, "extended_only");
|
// TODO: fail verifyReadExtended(region, "extended_only");
|
||||||
// TODO: fail verifyReadExtended(region, "extended_and_np");
|
// TODO: fail verifyReadExtended(region, "extended_and_np");
|
||||||
verifyReadNotPlaced(region, "outside_intervals");
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||||
|
|
||||||
|
verifyReadNotPlaced(region, "simple");
|
||||||
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_equal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_unequal");
|
||||||
|
verifyReadNotPlaced(region, "extended_only");
|
||||||
|
verifyReadNotPlaced(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadPrimary(region, "simple20");
|
||||||
|
|
||||||
// TODO: more tests and edge cases
|
// TODO: more tests and edge cases
|
||||||
}
|
}
|
||||||
|
|
@ -282,6 +310,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
|
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
|
||||||
t.traverse(walker, dataProvider, 0);
|
t.traverse(walker, dataProvider, 0);
|
||||||
|
|
||||||
|
t.endTraversal(walker, 0);
|
||||||
|
|
||||||
return walker.mappedActiveRegions;
|
return walker.mappedActiveRegions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -793,7 +793,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
int sampleI = 0;
|
int sampleI = 0;
|
||||||
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
|
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
|
||||||
genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles));
|
genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
|
||||||
}
|
}
|
||||||
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
|
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue