Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
6fbd41724a
|
|
@ -138,7 +138,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
* Any number of VCF files representing known indels to be used for constructing alternate consenses.
|
* Any number of VCF files representing known indels to be used for constructing alternate consenses.
|
||||||
* Could be e.g. dbSNP and/or official 1000 Genomes indel calls. Non-indel variants in these files will be ignored.
|
* Could be e.g. dbSNP and/or official 1000 Genomes indel calls. Non-indel variants in these files will be ignored.
|
||||||
*/
|
*/
|
||||||
@Input(fullName="known", shortName = "known", doc="Input VCF file(s) with known indels", required=false)
|
@Input(fullName="knownAlleles", shortName = "known", doc="Input VCF file(s) with known indels", required=false)
|
||||||
public List<RodBinding<VariantContext>> known = Collections.emptyList();
|
public List<RodBinding<VariantContext>> known = Collections.emptyList();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Emits intervals for the Local Indel Realigner to target for realignment.
|
* Emits intervals for the Local Indel Realigner to target for realignment.
|
||||||
|
|
@ -103,7 +104,7 @@ import java.util.List;
|
||||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||||
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.Event> {
|
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.EventPair> implements TreeReducible<RealignerTargetCreator.EventPair> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The target intervals for realignment.
|
* The target intervals for realignment.
|
||||||
|
|
@ -251,43 +252,125 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
return new Event(eventLoc, furthestStopPos, eventType);
|
return new Event(eventLoc, furthestStopPos, eventType);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(Event sum) {
|
public void onTraversalDone(EventPair sum) {
|
||||||
if ( sum != null && sum.isReportableEvent() )
|
if ( sum.left != null && sum.left.isReportableEvent() )
|
||||||
out.println(sum.toString());
|
sum.intervals.add(sum.left.getLoc());
|
||||||
|
if ( sum.right != null && sum.right.isReportableEvent() )
|
||||||
|
sum.intervals.add(sum.right.getLoc());
|
||||||
|
|
||||||
|
for ( GenomeLoc loc : sum.intervals )
|
||||||
|
out.println(loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Event reduceInit() {
|
public EventPair reduceInit() {
|
||||||
return null;
|
return new EventPair(null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Event reduce(Event value, Event sum) {
|
public EventPair treeReduce(EventPair lhs, EventPair rhs) {
|
||||||
// ignore no new events
|
EventPair result;
|
||||||
if ( value == null )
|
|
||||||
return sum;
|
|
||||||
|
|
||||||
// if it's the first good value, use it
|
if ( lhs.left == null ) {
|
||||||
if ( sum == null )
|
result = rhs;
|
||||||
return value;
|
} else if ( rhs.left == null ) {
|
||||||
|
result = lhs;
|
||||||
|
} else if ( lhs.right == null ) {
|
||||||
|
if ( rhs.right == null ) {
|
||||||
|
if ( canBeMerged(lhs.left, rhs.left) )
|
||||||
|
result = new EventPair(mergeEvents(lhs.left, rhs.left), null, lhs.intervals, rhs.intervals);
|
||||||
|
else
|
||||||
|
result = new EventPair(lhs.left, rhs.left, lhs.intervals, rhs.intervals);
|
||||||
|
} else {
|
||||||
|
if ( canBeMerged(lhs.left, rhs.left) )
|
||||||
|
result = new EventPair(mergeEvents(lhs.left, rhs.left), rhs.right, lhs.intervals, rhs.intervals);
|
||||||
|
else {
|
||||||
|
if ( rhs.left.isReportableEvent() )
|
||||||
|
rhs.intervals.add(rhs.left.getLoc());
|
||||||
|
result = new EventPair(lhs.left, rhs.right, lhs.intervals, rhs.intervals);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if ( rhs.right == null ) {
|
||||||
|
if ( canBeMerged(lhs.right, rhs.left) )
|
||||||
|
result = new EventPair(lhs.left, mergeEvents(lhs.right, rhs.left), lhs.intervals, rhs.intervals);
|
||||||
|
else {
|
||||||
|
if ( lhs.right.isReportableEvent() )
|
||||||
|
lhs.intervals.add(lhs.right.getLoc());
|
||||||
|
result = new EventPair(lhs.left, rhs.left, lhs.intervals, rhs.intervals);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ( canBeMerged(lhs.right, rhs.left) ) {
|
||||||
|
Event merge = mergeEvents(lhs.right, rhs.left);
|
||||||
|
if ( merge.isReportableEvent() )
|
||||||
|
lhs.intervals.add(merge.getLoc());
|
||||||
|
} else {
|
||||||
|
if ( lhs.right.isReportableEvent() )
|
||||||
|
lhs.intervals.add(lhs.right.getLoc());
|
||||||
|
if ( rhs.left.isReportableEvent() )
|
||||||
|
rhs.intervals.add(rhs.left.getLoc());
|
||||||
|
}
|
||||||
|
|
||||||
// if we hit a new contig or they have no overlapping reads, then they are separate events - so clear sum
|
result = new EventPair(lhs.left, rhs.right, lhs.intervals, rhs.intervals);
|
||||||
if ( sum.loc.getContigIndex() != value.loc.getContigIndex() || sum.furthestStopPos < value.loc.getStart() ) {
|
}
|
||||||
if ( sum.isReportableEvent() )
|
|
||||||
out.println(sum.toString());
|
return result;
|
||||||
return value;
|
}
|
||||||
|
|
||||||
|
public EventPair reduce(Event value, EventPair sum) {
|
||||||
|
if ( value == null ) {
|
||||||
|
; // do nothing
|
||||||
|
} else if ( sum.left == null ) {
|
||||||
|
sum.left = value;
|
||||||
|
} else if ( sum.right == null ) {
|
||||||
|
if ( canBeMerged(sum.left, value) )
|
||||||
|
sum.left = mergeEvents(sum.left, value);
|
||||||
|
else
|
||||||
|
sum.right = value;
|
||||||
|
} else {
|
||||||
|
if ( canBeMerged(sum.right, value) )
|
||||||
|
sum.right = mergeEvents(sum.right, value);
|
||||||
|
else {
|
||||||
|
if ( sum.right.isReportableEvent() )
|
||||||
|
sum.intervals.add(sum.right.getLoc());
|
||||||
|
sum.right = value;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// otherwise, merge the two events
|
|
||||||
sum.merge(value);
|
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static private boolean canBeMerged(Event left, Event right) {
|
||||||
|
return left.loc.getContigIndex() == right.loc.getContigIndex() && left.furthestStopPos >= right.loc.getStart();
|
||||||
|
}
|
||||||
|
|
||||||
|
@com.google.java.contract.Requires({"left != null", "right != null"})
|
||||||
|
static private Event mergeEvents(Event left, Event right) {
|
||||||
|
left.merge(right);
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
|
||||||
private enum EVENT_TYPE { POINT_EVENT, INDEL_EVENT, BOTH }
|
private enum EVENT_TYPE { POINT_EVENT, INDEL_EVENT, BOTH }
|
||||||
|
|
||||||
|
class EventPair {
|
||||||
|
public Event left, right;
|
||||||
|
public TreeSet<GenomeLoc> intervals = new TreeSet<GenomeLoc>();
|
||||||
|
|
||||||
|
public EventPair(Event left, Event right) {
|
||||||
|
this.left = left;
|
||||||
|
this.right = right;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EventPair(Event left, Event right, TreeSet<GenomeLoc> set1, TreeSet<GenomeLoc> set2) {
|
||||||
|
this.left = left;
|
||||||
|
this.right = right;
|
||||||
|
intervals.addAll(set1);
|
||||||
|
intervals.addAll(set2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class Event {
|
class Event {
|
||||||
public int furthestStopPos;
|
public int furthestStopPos;
|
||||||
|
|
||||||
public GenomeLoc loc;
|
private GenomeLoc loc;
|
||||||
public int eventStartPos;
|
private int eventStartPos;
|
||||||
private int eventStopPos;
|
private int eventStopPos;
|
||||||
private EVENT_TYPE type;
|
private EVENT_TYPE type;
|
||||||
private ArrayList<Integer> pointEvents = new ArrayList<Integer>();
|
private ArrayList<Integer> pointEvents = new ArrayList<Integer>();
|
||||||
|
|
@ -332,6 +415,10 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
eventStartPos = lastPosition;
|
eventStartPos = lastPosition;
|
||||||
else
|
else
|
||||||
eventStartPos = Math.min(eventStartPos, lastPosition);
|
eventStartPos = Math.min(eventStartPos, lastPosition);
|
||||||
|
} else if ( eventStartPos == -1 && e.eventStartPos != -1 ) {
|
||||||
|
eventStartPos = e.eventStartPos;
|
||||||
|
eventStopPos = e.eventStopPos;
|
||||||
|
furthestStopPos = e.furthestStopPos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pointEvents.add(newPosition);
|
pointEvents.add(newPosition);
|
||||||
|
|
@ -342,8 +429,8 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
return getToolkit().getGenomeLocParser().isValidGenomeLoc(loc.getContig(), eventStartPos, eventStopPos, true) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
|
return getToolkit().getGenomeLocParser().isValidGenomeLoc(loc.getContig(), eventStartPos, eventStopPos, true) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public GenomeLoc getLoc() {
|
||||||
return String.format("%s:%d-%d", loc.getContig(), eventStartPos, eventStopPos);
|
return getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), eventStartPos, eventStopPos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -80,7 +80,8 @@ public abstract class BaseTest {
|
||||||
public static final String networkTempDir = "/broad/shptmp/";
|
public static final String networkTempDir = "/broad/shptmp/";
|
||||||
public static final File networkTempDirFile = new File(networkTempDir);
|
public static final File networkTempDirFile = new File(networkTempDir);
|
||||||
|
|
||||||
public static final String testDir = "public/testdata/";
|
public static final File testDirFile = new File("public/testdata/");
|
||||||
|
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
||||||
|
|
||||||
/** before the class starts up */
|
/** before the class starts up */
|
||||||
static {
|
static {
|
||||||
|
|
|
||||||
|
|
@ -8,20 +8,41 @@ import java.util.Arrays;
|
||||||
public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIntervals() {
|
public void testIntervals1() {
|
||||||
|
String md5 = "3f0b63a393104d0c4158c7d1538153b8";
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
"-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
|
"-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("e7accfa58415d6da80383953b1a3a986"));
|
Arrays.asList(md5));
|
||||||
executeTest("test standard", spec1);
|
executeTest("test standard nt=1", spec1);
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
"-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s",
|
"-nt 4 -T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("0367d39a122c8ac0899fb868a82ef728"));
|
Arrays.asList(md5));
|
||||||
executeTest("test dbsnp", spec2);
|
executeTest("test standard nt=4", spec2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIntervals2() {
|
||||||
|
String md5 = "e0f745b79b679c225314a2abef4919ff";
|
||||||
|
|
||||||
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
|
||||||
|
1,
|
||||||
|
Arrays.asList(md5));
|
||||||
|
executeTest("test with dbsnp nt=1", spec1);
|
||||||
|
|
||||||
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-nt 4 -T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
|
||||||
|
1,
|
||||||
|
Arrays.asList(md5));
|
||||||
|
executeTest("test with dbsnp nt=4", spec2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKnownsOnly() {
|
||||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||||
"-T RealignerTargetCreator -R " + b36KGReference + " --known " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI known -o %s",
|
"-T RealignerTargetCreator -R " + b36KGReference + " --known " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI known -o %s",
|
||||||
1,
|
1,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue