Added file-extension--dependent interval-list output to RealignerTargetCreator.

This commit is contained in:
Samuel Lee 2015-08-12 14:19:04 -04:00
parent 687d2d0244
commit 41256e1405
2 changed files with 102 additions and 32 deletions

View File

@ -51,6 +51,11 @@
package org.broadinstitute.gatk.tools.walkers.indels;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import org.apache.commons.io.FilenameUtils;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.utils.commandline.Input;
@ -61,6 +66,7 @@ import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.filters.*;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.utils.exceptions.GATKException;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -70,7 +76,9 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.VariantContext;
import java.io.PrintStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -143,7 +151,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
* The target intervals for realignment.
*/
@Output
protected PrintStream out;
protected File out;
/**
* Any number of VCF files representing known SNPs and/or indels. Could be e.g. dbSNP and/or official 1000 Genomes indel calls.
@ -276,8 +284,24 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
if ( sum.right != null && sum.right.isReportableEvent() )
sum.intervals.add(sum.right.getLoc());
for ( GenomeLoc loc : sum.intervals )
out.println(loc);
if ( FilenameUtils.getExtension(out.getName()).equals("interval_list") ) {
final SAMFileHeader masterSequenceDictionaryHeader = new SAMFileHeader();
masterSequenceDictionaryHeader.setSequenceDictionary(getToolkit().getMasterSequenceDictionary());
final IntervalList intervalList = new IntervalList(masterSequenceDictionaryHeader);
for ( GenomeLoc loc : sum.intervals ) {
intervalList.add(new Interval(loc.getContig(), loc.getStart(), loc.getStop()));
}
intervalList.write(out);
} else {
try ( BufferedWriter bufferedWriter = IOUtil.openFileForBufferedWriting(out) ) {
for ( GenomeLoc loc : sum.intervals ) {
bufferedWriter.write(loc.toString());
bufferedWriter.newLine();
}
} catch (final IOException e) {
throw new GATKException("Error writing out intervals to file: " + out.getAbsolutePath(), e);
}
}
}
public EventPair reduceInit() {

View File

@ -51,45 +51,60 @@
package org.broadinstitute.gatk.tools.walkers.indels;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
@Test
public void testIntervals1() {
String md5 = "3f0b63a393104d0c4158c7d1538153b8";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
1,
Arrays.asList(md5));
executeTest("test standard nt=1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-nt 4 -T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000 -o %s",
1,
Arrays.asList(md5));
executeTest("test standard nt=4", spec2);
@DataProvider(name = "intervals1")
public Object[][] intervals1() {
String arguments = "-T RealignerTargetCreator -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam --mismatchFraction 0.15 -L 1:10,000,000-10,050,000";
return new Object[][]{
{"test standard nt=1", arguments},
{"test standard nt=4", "-nt 4 " + arguments}
};
}
@Test
public void testIntervals2() {
@DataProvider(name = "intervals2")
public Object[][] intervals2() {
String arguments = "-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000";
return new Object[][]{
{"test with dbsnp nt=1", arguments},
{"test with dbsnp nt=4", "-nt 4 " + arguments}
};
}
@Test(dataProvider = "intervals1")
public void testIntervals1(String testName, String arguments) {
String md5 = "3f0b63a393104d0c4158c7d1538153b8";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5));
executeTest(testName, spec);
}
@Test(dataProvider = "intervals2")
public void testIntervals2(String testName, String arguments) {
String md5 = "d073237694175c75d37bd4f40b8c64db";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
1,
Arrays.asList(md5));
executeTest("test with dbsnp nt=1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-nt 4 -T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,200,000 -o %s",
1,
Arrays.asList(md5));
executeTest("test with dbsnp nt=4", spec2);
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(arguments + " -o %s", 1, Arrays.asList(md5));
executeTest(testName, spec);
}
@Test
@ -110,4 +125,35 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
Arrays.asList(""));
executeTest("test bad cigar string string does not fail", spec);
}
@Test(dataProvider = "intervals1")
public void testTargetListAgainstIntervalList(String testName, String arguments) throws IOException {
final List<String> md5 = Collections.emptyList();
final File targetListFile = createTempFile("RTCTest", ".targets");
final File intervalListFile = createTempFile("RTCTest", ".interval_list");
WalkerTest.WalkerTestSpec targetListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5);
WalkerTest.WalkerTestSpec intervalListSpec = new WalkerTest.WalkerTestSpec(arguments, 1, md5);
targetListSpec.setOutputFileLocation(targetListFile);
intervalListSpec.setOutputFileLocation(intervalListFile);
executeTest(testName + " (compare target-list and interval-list output)", targetListSpec);
executeTest(testName + " (compare target-list and interval-list output)", intervalListSpec);
final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
final GenomeLocParser hg19GenomeLocParser = new GenomeLocParser(seq);
final List<GenomeLoc> targetList = IntervalUtils.intervalFileToList(hg19GenomeLocParser,
targetListFile.getAbsolutePath());
final List<Interval> targetListResult = new ArrayList<>();
for ( GenomeLoc target : targetList ) {
targetListResult.add(new Interval(target.getContig(), target.getStart(), target.getStop()));
}
final List<Interval> intervalListResult = IntervalList.fromFile(intervalListFile).getIntervals();
Assert.assertFalse(targetListResult.isEmpty());
Assert.assertFalse(intervalListResult.isEmpty());
Assert.assertEquals(targetListResult, intervalListResult);
}
}