Merge pull request #1135 from broadinstitute/sl_issue_1056
Added file-extension--dependent interval-list output to RealignerTargetCreator.
This commit is contained in:
commit
4464c04c9c
|
|
@ -51,6 +51,11 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.samtools.util.IOUtil;
|
||||
import htsjdk.samtools.util.Interval;
|
||||
import htsjdk.samtools.util.IntervalList;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.broadinstitute.gatk.engine.walkers.*;
|
||||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||
import org.broadinstitute.gatk.utils.commandline.Input;
|
||||
|
|
@ -61,6 +66,7 @@ import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
|||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.filters.*;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.utils.exceptions.GATKException;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
|
@ -70,7 +76,9 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
|
@ -143,7 +151,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
|||
* The target intervals for realignment.
|
||||
*/
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
protected File out;
|
||||
|
||||
/**
|
||||
* Any number of VCF files representing known SNPs and/or indels. Could be e.g. dbSNP and/or official 1000 Genomes indel calls.
|
||||
|
|
@ -276,8 +284,24 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
|||
if ( sum.right != null && sum.right.isReportableEvent() )
|
||||
sum.intervals.add(sum.right.getLoc());
|
||||
|
||||
for ( GenomeLoc loc : sum.intervals )
|
||||
out.println(loc);
|
||||
if ( FilenameUtils.getExtension(out.getName()).equals("interval_list") ) {
|
||||
final SAMFileHeader masterSequenceDictionaryHeader = new SAMFileHeader();
|
||||
masterSequenceDictionaryHeader.setSequenceDictionary(getToolkit().getMasterSequenceDictionary());
|
||||
final IntervalList intervalList = new IntervalList(masterSequenceDictionaryHeader);
|
||||
for ( GenomeLoc loc : sum.intervals ) {
|
||||
intervalList.add(new Interval(loc.getContig(), loc.getStart(), loc.getStop()));
|
||||
}
|
||||
intervalList.write(out);
|
||||
} else {
|
||||
try ( BufferedWriter bufferedWriter = IOUtil.openFileForBufferedWriting(out) ) {
|
||||
for ( GenomeLoc loc : sum.intervals ) {
|
||||
bufferedWriter.write(loc.toString());
|
||||
bufferedWriter.newLine();
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new GATKException("Error writing out intervals to file: " + out.getAbsolutePath(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public EventPair reduceInit() {
|
||||
|
|
|
|||
|
|
@ -51,45 +51,60 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.indels;
|
||||
|
||||
import htsjdk.samtools.reference.ReferenceSequenceFile;
|
||||
import htsjdk.samtools.util.Interval;
|
||||
import htsjdk.samtools.util.IntervalList;
|
||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void testIntervals1() {
|
||||
String md5 = "3f0b63a393104d0c4158c7d1538153b8";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
"-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test standard nt=1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
"-nt 4 -T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test standard nt=4", spec2);
|
||||
@DataProvider(name = "intervals1")
|
||||
public Object[][] intervals1() {
|
||||
String arguments = "-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000";
|
||||
return new Object[][]{
|
||||
{"test standard nt=1", arguments},
|
||||
{"test standard nt=4", "-nt 4 " + arguments}
|
||||
};
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntervals2() {
|
||||
@DataProvider(name = "intervals2")
|
||||
public Object[][] intervals2() {
|
||||
String arguments = "-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000";
|
||||
return new Object[][]{
|
||||
{"test with dbsnp nt=1", arguments},
|
||||
{"test with dbsnp nt=4", "-nt 4 " + arguments}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "intervals1")
|
||||
public void testIntervals1(String testName, String arguments) {
|
||||
String md5 = "3f0b63a393104d0c4158c7d1538153b8";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5));
|
||||
executeTest(testName, spec);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "intervals2")
|
||||
public void testIntervals2(String testName, String arguments) {
|
||||
String md5 = "d073237694175c75d37bd4f40b8c64db";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
"-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test with dbsnp nt=1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
"-nt 4 -T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test with dbsnp nt=4", spec2);
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5));
|
||||
executeTest(testName, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -110,4 +125,35 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
|||
Arrays.asList(""));
|
||||
executeTest("test bad cigar string string does not fail", spec);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "intervals1")
|
||||
public void testTargetListAgainstIntervalList(String testName, String arguments) throws IOException {
|
||||
final List<String> md5 = Collections.emptyList();
|
||||
final File targetListFile = createTempFile("RTCTest", ".targets");
|
||||
final File intervalListFile = createTempFile("RTCTest", ".interval_list");
|
||||
|
||||
WalkerTest.WalkerTestSpec targetListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5);
|
||||
WalkerTest.WalkerTestSpec intervalListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5);
|
||||
|
||||
targetListSpec.setOutputFileLocation(targetListFile);
|
||||
intervalListSpec.setOutputFileLocation(intervalListFile);
|
||||
|
||||
executeTest(testName + " (compare target-list and interval-list output)", targetListSpec);
|
||||
executeTest(testName + " (compare target-list and interval-list output)", intervalListSpec);
|
||||
|
||||
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
final GenomeLocParser hg19GenomeLocParser = new GenomeLocParser(seq);
|
||||
final List<GenomeLoc> targetList = IntervalUtils.intervalFileToList(hg19GenomeLocParser,
|
||||
targetListFile.getAbsolutePath());
|
||||
final List<Interval> targetListResult = new ArrayList<>();
|
||||
for ( GenomeLoc target : targetList ) {
|
||||
targetListResult.add(new Interval(target.getContig(), target.getStart(), target.getStop()));
|
||||
}
|
||||
|
||||
final List<Interval> intervalListResult = IntervalList.fromFile(intervalListFile).getIntervals();
|
||||
|
||||
Assert.assertFalse(targetListResult.isEmpty());
|
||||
Assert.assertFalse(intervalListResult.isEmpty());
|
||||
Assert.assertEquals(targetListResult, intervalListResult);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue