Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Matt Hanna 2011-10-07 11:20:00 -04:00
commit 6fbd41724a
4 changed files with 141 additions and 32 deletions

View File

@ -138,7 +138,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
* Any number of VCF files representing known indels to be used for constructing alternate consenses.
* Could be e.g. dbSNP and/or official 1000 Genomes indel calls. Non-indel variants in these files will be ignored.
*/
@Input(fullName="known", shortName = "known", doc="Input VCF file(s) with known indels", required=false)
@Input(fullName="knownAlleles", shortName = "known", doc="Input VCF file(s) with known indels", required=false)
public List<RodBinding<VariantContext>> known = Collections.emptyList();
/**

View File

@ -50,6 +50,7 @@ import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.TreeSet;
/**
* Emits intervals for the Local Indel Realigner to target for realignment.
@ -103,7 +104,7 @@ import java.util.List;
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@By(DataSource.REFERENCE)
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.Event> {
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.EventPair> implements TreeReducible<RealignerTargetCreator.EventPair> {
/**
* The target intervals for realignment.
@ -251,43 +252,125 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
return new Event(eventLoc, furthestStopPos, eventType);
}
public void onTraversalDone(Event sum) {
if ( sum != null && sum.isReportableEvent() )
out.println(sum.toString());
public void onTraversalDone(EventPair sum) {
if ( sum.left != null && sum.left.isReportableEvent() )
sum.intervals.add(sum.left.getLoc());
if ( sum.right != null && sum.right.isReportableEvent() )
sum.intervals.add(sum.right.getLoc());
for ( GenomeLoc loc : sum.intervals )
out.println(loc);
}
public Event reduceInit() {
return null;
public EventPair reduceInit() {
return new EventPair(null, null);
}
public Event reduce(Event value, Event sum) {
// ignore no new events
if ( value == null )
return sum;
public EventPair treeReduce(EventPair lhs, EventPair rhs) {
EventPair result;
// if it's the first good value, use it
if ( sum == null )
return value;
if ( lhs.left == null ) {
result = rhs;
} else if ( rhs.left == null ) {
result = lhs;
} else if ( lhs.right == null ) {
if ( rhs.right == null ) {
if ( canBeMerged(lhs.left, rhs.left) )
result = new EventPair(mergeEvents(lhs.left, rhs.left), null, lhs.intervals, rhs.intervals);
else
result = new EventPair(lhs.left, rhs.left, lhs.intervals, rhs.intervals);
} else {
if ( canBeMerged(lhs.left, rhs.left) )
result = new EventPair(mergeEvents(lhs.left, rhs.left), rhs.right, lhs.intervals, rhs.intervals);
else {
if ( rhs.left.isReportableEvent() )
rhs.intervals.add(rhs.left.getLoc());
result = new EventPair(lhs.left, rhs.right, lhs.intervals, rhs.intervals);
}
}
} else if ( rhs.right == null ) {
if ( canBeMerged(lhs.right, rhs.left) )
result = new EventPair(lhs.left, mergeEvents(lhs.right, rhs.left), lhs.intervals, rhs.intervals);
else {
if ( lhs.right.isReportableEvent() )
lhs.intervals.add(lhs.right.getLoc());
result = new EventPair(lhs.left, rhs.left, lhs.intervals, rhs.intervals);
}
} else {
if ( canBeMerged(lhs.right, rhs.left) ) {
Event merge = mergeEvents(lhs.right, rhs.left);
if ( merge.isReportableEvent() )
lhs.intervals.add(merge.getLoc());
} else {
if ( lhs.right.isReportableEvent() )
lhs.intervals.add(lhs.right.getLoc());
if ( rhs.left.isReportableEvent() )
rhs.intervals.add(rhs.left.getLoc());
}
// if we hit a new contig or they have no overlapping reads, then they are separate events - so clear sum
if ( sum.loc.getContigIndex() != value.loc.getContigIndex() || sum.furthestStopPos < value.loc.getStart() ) {
if ( sum.isReportableEvent() )
out.println(sum.toString());
return value;
result = new EventPair(lhs.left, rhs.right, lhs.intervals, rhs.intervals);
}
return result;
}
public EventPair reduce(Event value, EventPair sum) {
if ( value == null ) {
; // do nothing
} else if ( sum.left == null ) {
sum.left = value;
} else if ( sum.right == null ) {
if ( canBeMerged(sum.left, value) )
sum.left = mergeEvents(sum.left, value);
else
sum.right = value;
} else {
if ( canBeMerged(sum.right, value) )
sum.right = mergeEvents(sum.right, value);
else {
if ( sum.right.isReportableEvent() )
sum.intervals.add(sum.right.getLoc());
sum.right = value;
}
}
// otherwise, merge the two events
sum.merge(value);
return sum;
}
static private boolean canBeMerged(Event left, Event right) {
return left.loc.getContigIndex() == right.loc.getContigIndex() && left.furthestStopPos >= right.loc.getStart();
}
@com.google.java.contract.Requires({"left != null", "right != null"})
static private Event mergeEvents(Event left, Event right) {
left.merge(right);
return left;
}
private enum EVENT_TYPE { POINT_EVENT, INDEL_EVENT, BOTH }
class EventPair {
public Event left, right;
public TreeSet<GenomeLoc> intervals = new TreeSet<GenomeLoc>();
public EventPair(Event left, Event right) {
this.left = left;
this.right = right;
}
public EventPair(Event left, Event right, TreeSet<GenomeLoc> set1, TreeSet<GenomeLoc> set2) {
this.left = left;
this.right = right;
intervals.addAll(set1);
intervals.addAll(set2);
}
}
class Event {
public int furthestStopPos;
public GenomeLoc loc;
public int eventStartPos;
private GenomeLoc loc;
private int eventStartPos;
private int eventStopPos;
private EVENT_TYPE type;
private ArrayList<Integer> pointEvents = new ArrayList<Integer>();
@ -332,6 +415,10 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
eventStartPos = lastPosition;
else
eventStartPos = Math.min(eventStartPos, lastPosition);
} else if ( eventStartPos == -1 && e.eventStartPos != -1 ) {
eventStartPos = e.eventStartPos;
eventStopPos = e.eventStopPos;
furthestStopPos = e.furthestStopPos;
}
}
pointEvents.add(newPosition);
@ -342,8 +429,8 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
return getToolkit().getGenomeLocParser().isValidGenomeLoc(loc.getContig(), eventStartPos, eventStopPos, true) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
}
public String toString() {
return String.format("%s:%d-%d", loc.getContig(), eventStartPos, eventStopPos);
public GenomeLoc getLoc() {
return getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), eventStartPos, eventStopPos);
}
}
}

View File

@ -80,7 +80,8 @@ public abstract class BaseTest {
public static final String networkTempDir = "/broad/shptmp/";
public static final File networkTempDirFile = new File(networkTempDir);
public static final String testDir = "public/testdata/";
public static final File testDirFile = new File("public/testdata/");
public static final String testDir = testDirFile.getAbsolutePath() + "/";
/** before the class starts up */
static {

View File

@ -8,20 +8,41 @@ import java.util.Arrays;
public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
@Test
public void testIntervals() {
public void testIntervals1() {
String md5 = "3f0b63a393104d0c4158c7d1538153b8";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
1,
Arrays.asList("e7accfa58415d6da80383953b1a3a986"));
executeTest("test standard", spec1);
Arrays.asList(md5));
executeTest("test standard nt=1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
"-nt 4 -T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
1,
Arrays.asList("0367d39a122c8ac0899fb868a82ef728"));
executeTest("test dbsnp", spec2);
Arrays.asList(md5));
executeTest("test standard nt=4", spec2);
}
@Test
public void testIntervals2() {
String md5 = "e0f745b79b679c225314a2abef4919ff";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
1,
Arrays.asList(md5));
executeTest("test with dbsnp nt=1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-nt 4 -T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
1,
Arrays.asList(md5));
executeTest("test with dbsnp nt=4", spec2);
}
@Test
public void testKnownsOnly() {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator -R " + b36KGReference + " --known " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI known -o %s",
1,