Merge pull request #1135 from broadinstitute/sl_issue_1056
Added file-extension--dependent interval-list output to RealignerTargetCreator.
This commit is contained in:
commit
4464c04c9c
|
|
@ -51,6 +51,11 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.indels;
|
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||||
|
|
||||||
|
import htsjdk.samtools.SAMFileHeader;
|
||||||
|
import htsjdk.samtools.util.IOUtil;
|
||||||
|
import htsjdk.samtools.util.Interval;
|
||||||
|
import htsjdk.samtools.util.IntervalList;
|
||||||
|
import org.apache.commons.io.FilenameUtils;
|
||||||
import org.broadinstitute.gatk.engine.walkers.*;
|
import org.broadinstitute.gatk.engine.walkers.*;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||||
import org.broadinstitute.gatk.utils.commandline.Input;
|
import org.broadinstitute.gatk.utils.commandline.Input;
|
||||||
|
|
@ -61,6 +66,7 @@ import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.engine.filters.*;
|
import org.broadinstitute.gatk.engine.filters.*;
|
||||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||||
|
import org.broadinstitute.gatk.utils.exceptions.GATKException;
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
|
|
@ -70,7 +76,9 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -143,7 +151,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
* The target intervals for realignment.
|
* The target intervals for realignment.
|
||||||
*/
|
*/
|
||||||
@Output
|
@Output
|
||||||
protected PrintStream out;
|
protected File out;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Any number of VCF files representing known SNPs and/or indels. Could be e.g. dbSNP and/or official 1000 Genomes indel calls.
|
* Any number of VCF files representing known SNPs and/or indels. Could be e.g. dbSNP and/or official 1000 Genomes indel calls.
|
||||||
|
|
@ -276,8 +284,24 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
if ( sum.right != null && sum.right.isReportableEvent() )
|
if ( sum.right != null && sum.right.isReportableEvent() )
|
||||||
sum.intervals.add(sum.right.getLoc());
|
sum.intervals.add(sum.right.getLoc());
|
||||||
|
|
||||||
for ( GenomeLoc loc : sum.intervals )
|
if ( FilenameUtils.getExtension(out.getName()).equals("interval_list") ) {
|
||||||
out.println(loc);
|
final SAMFileHeader masterSequenceDictionaryHeader = new SAMFileHeader();
|
||||||
|
masterSequenceDictionaryHeader.setSequenceDictionary(getToolkit().getMasterSequenceDictionary());
|
||||||
|
final IntervalList intervalList = new IntervalList(masterSequenceDictionaryHeader);
|
||||||
|
for ( GenomeLoc loc : sum.intervals ) {
|
||||||
|
intervalList.add(new Interval(loc.getContig(), loc.getStart(), loc.getStop()));
|
||||||
|
}
|
||||||
|
intervalList.write(out);
|
||||||
|
} else {
|
||||||
|
try ( BufferedWriter bufferedWriter = IOUtil.openFileForBufferedWriting(out) ) {
|
||||||
|
for ( GenomeLoc loc : sum.intervals ) {
|
||||||
|
bufferedWriter.write(loc.toString());
|
||||||
|
bufferedWriter.newLine();
|
||||||
|
}
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new GATKException("Error writing out intervals to file: " + out.getAbsolutePath(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public EventPair reduceInit() {
|
public EventPair reduceInit() {
|
||||||
|
|
|
||||||
|
|
@ -51,45 +51,60 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.indels;
|
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||||
|
|
||||||
|
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||||
|
import htsjdk.samtools.util.Interval;
|
||||||
|
import htsjdk.samtools.util.IntervalList;
|
||||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||||
|
import org.broadinstitute.gatk.utils.BaseTest;
|
||||||
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@DataProvider(name = "intervals1")
|
||||||
public void testIntervals1() {
|
public Object[][] intervals1() {
|
||||||
String md5 = "3f0b63a393104d0c4158c7d1538153b8";
|
String arguments = "-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000";
|
||||||
|
return new Object[][]{
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
{"test standard nt=1", arguments},
|
||||||
"-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
|
{"test standard nt=4", "-nt 4 " + arguments}
|
||||||
1,
|
};
|
||||||
Arrays.asList(md5));
|
|
||||||
executeTest("test standard nt=1", spec1);
|
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
|
||||||
"-nt 4 -T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
|
|
||||||
1,
|
|
||||||
Arrays.asList(md5));
|
|
||||||
executeTest("test standard nt=4", spec2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@DataProvider(name = "intervals2")
|
||||||
public void testIntervals2() {
|
public Object[][] intervals2() {
|
||||||
|
String arguments = "-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000";
|
||||||
|
return new Object[][]{
|
||||||
|
{"test with dbsnp nt=1", arguments},
|
||||||
|
{"test with dbsnp nt=4", "-nt 4 " + arguments}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "intervals1")
|
||||||
|
public void testIntervals1(String testName, String arguments) {
|
||||||
|
String md5 = "3f0b63a393104d0c4158c7d1538153b8";
|
||||||
|
|
||||||
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5));
|
||||||
|
executeTest(testName, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "intervals2")
|
||||||
|
public void testIntervals2(String testName, String arguments) {
|
||||||
String md5 = "d073237694175c75d37bd4f40b8c64db";
|
String md5 = "d073237694175c75d37bd4f40b8c64db";
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5));
|
||||||
"-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
|
executeTest(testName, spec);
|
||||||
1,
|
|
||||||
Arrays.asList(md5));
|
|
||||||
executeTest("test with dbsnp nt=1", spec1);
|
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
|
||||||
"-nt 4 -T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
|
|
||||||
1,
|
|
||||||
Arrays.asList(md5));
|
|
||||||
executeTest("test with dbsnp nt=4", spec2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -110,4 +125,35 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList(""));
|
Arrays.asList(""));
|
||||||
executeTest("test bad cigar string string does not fail", spec);
|
executeTest("test bad cigar string string does not fail", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "intervals1")
|
||||||
|
public void testTargetListAgainstIntervalList(String testName, String arguments) throws IOException {
|
||||||
|
final List<String> md5 = Collections.emptyList();
|
||||||
|
final File targetListFile = createTempFile("RTCTest", ".targets");
|
||||||
|
final File intervalListFile = createTempFile("RTCTest", ".interval_list");
|
||||||
|
|
||||||
|
WalkerTest.WalkerTestSpec targetListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5);
|
||||||
|
WalkerTest.WalkerTestSpec intervalListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5);
|
||||||
|
|
||||||
|
targetListSpec.setOutputFileLocation(targetListFile);
|
||||||
|
intervalListSpec.setOutputFileLocation(intervalListFile);
|
||||||
|
|
||||||
|
executeTest(testName + " (compare target-list and interval-list output)", targetListSpec);
|
||||||
|
executeTest(testName + " (compare target-list and interval-list output)", intervalListSpec);
|
||||||
|
|
||||||
|
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||||
|
final GenomeLocParser hg19GenomeLocParser = new GenomeLocParser(seq);
|
||||||
|
final List<GenomeLoc> targetList = IntervalUtils.intervalFileToList(hg19GenomeLocParser,
|
||||||
|
targetListFile.getAbsolutePath());
|
||||||
|
final List<Interval> targetListResult = new ArrayList<>();
|
||||||
|
for ( GenomeLoc target : targetList ) {
|
||||||
|
targetListResult.add(new Interval(target.getContig(), target.getStart(), target.getStop()));
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Interval> intervalListResult = IntervalList.fromFile(intervalListFile).getIntervals();
|
||||||
|
|
||||||
|
Assert.assertFalse(targetListResult.isEmpty());
|
||||||
|
Assert.assertFalse(intervalListResult.isEmpty());
|
||||||
|
Assert.assertEquals(targetListResult, intervalListResult);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue