From 6e46b3696eb7e64bb15cc81620a414d19d2d5a95 Mon Sep 17 00:00:00 2001
From: Ron Levine
Date: Mon, 6 Jul 2015 12:30:53 -0400
Subject: [PATCH] Merge contiguous intervals properly
---
...astaAlternateReferenceIntegrationTest.java | 53 +++++++++++++++++--
.../walkers/fasta/FastaReferenceMaker.java | 18 +++++--
2 files changed, 63 insertions(+), 8 deletions(-)
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
index 6d1c4b0e3..765deb0e5 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java
@@ -59,23 +59,68 @@ import java.util.Arrays;
public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
+ private static String CONTIGUOUS_INTERVAL_SAME_CONTIG_MD5 = "e1f4b93f9071d158d94dc4fb25e07702";
+ private static String CONTIGUOUS_INTERVAL_DIFF_CONTIG_MD5 = "dfca4e0b0fe0cb18596ec51af541a69e";
+
@Test
public void testReferenceOnly() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s",
1,
- Arrays.asList("328d2d52cedfdc52da7d1abff487633d"));
+ Arrays.asList("75d4d352a9ce4fae22fd7924a42c800a"));
executeTest("test FastaReference", spec);
}
+ @Test
+ public void testReferenceOnlyContiguousSameContig() {
+
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,200 -L 1:10,000,201-10,000,301 -o %s",
+ 1,
+ Arrays.asList(CONTIGUOUS_INTERVAL_SAME_CONTIG_MD5));
+ executeTest("test FastaReference with contiguous intervals, same contig", spec);
+ }
+
+ @Test
+ public void testReferenceOnlyContiguousDiffContigs() {
+
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,200 -L 2:10,000,201-10,000,301 -o %s",
+ 1,
+ Arrays.asList(CONTIGUOUS_INTERVAL_DIFF_CONTIG_MD5));
+ executeTest("test FastaReference with contiguous intervals, different contigs", spec);
+ }
+
+ @Test
+ public void testAlternateReferenceContiguousSameContig() {
+ // Show that FastaAlternateReferenceMaker behaves the same as FastaReferenceMaker across contiguous intervals on the same contig.
+ // Note that there are variant locations in this interval.
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L 1:10,000,100-10,000,200 -L 1:10,000,201-10,000,301 -o %s",
+ 1,
+ Arrays.asList(CONTIGUOUS_INTERVAL_SAME_CONTIG_MD5));
+ executeTest("test Alternate FastaReference with contiguous intervals, same contig", spec);
+ }
+
+ @Test
+ public void testAlternateReferenceContiguousDiffContigs() {
+ // Show that FastaAlternateReferenceMaker behaves the same as FastaReferenceMaker across contiguous intervals on different contigs.
+ // Note that there are variant locations in this interval.
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L 1:10,000,100-10,000,200 -L 2:10,000,201-10,000,301 -o %s",
+ 1,
+ Arrays.asList(CONTIGUOUS_INTERVAL_DIFF_CONTIG_MD5));
+ executeTest("test Alternate FastaReference with contiguous intervals, different contigs", spec);
+ }
+
@Test
public void testIndelsAndSnpMask() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
1,
- Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
+ Arrays.asList("375efb2feb017f01339f680fdffac6cd"));
executeTest("test indels", spec);
}
@@ -85,7 +130,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500 -L 1:10,029,200-10,029,500 -o %s",
1,
- Arrays.asList("8b6cd2e20c381f9819aab2d270f5e641"));
+ Arrays.asList("81e30f0ab92684c496343c8ea51a393e"));
executeTest("test SNPs", spec);
}
@@ -108,7 +153,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b37KGReference + " --use_IUPAC_sample NA12878 -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf -L 20:61050-66380 -o %s",
1,
- Arrays.asList("5feb2a576ff2ed1745a007eaa36448b3"));
+ Arrays.asList("8fd887bca9f3949f2c23c3565f7dcc1b"));
executeTest("test iupac", spec);
}
}
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaReferenceMaker.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaReferenceMaker.java
index 08ab3019a..9df4579e2 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaReferenceMaker.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaReferenceMaker.java
@@ -55,7 +55,15 @@ import java.io.PrintStream;
*
* Output
*
- * A fasta file representing the requested intervals.
+ * A fasta file representing the requested intervals. Each interval has a description line starting with a greater-than (">") symbol followed by sequence data.
+ * The description begins with the contig name followed by the beginning position on the contig.
+ *
+ * For example, the fasta file for contig 1 and intervals 1:3-1:4 and 1:6-1:9
+ * >1 1:3
+ * AT
+ * >1 1:6
+ * GGGG
+ *
*
*
* Usage example
@@ -104,18 +112,20 @@ public class FastaReferenceMaker extends RefWalker, Geno
// if there is no interval to the left, then this is the first one
if ( sum == null ) {
sum = value.first;
+ fasta.setName(fasta.getName() + " " + sum.toString());
fasta.append(value.second);
}
- // if the intervals don't overlap, print out the leftmost one and start a new one
+ // if the intervals are not contiguous, print out the leftmost one and start a new one
// (end of contig or new interval)
- else if ( value.first.getStart() != sum.getStop() + 1 ) {
+ else if ( value.first.getStart() != sum.getStop() + 1 || ! value.first.getContig().equals(sum.getContig()) ) {
fasta.flush();
sum = value.first;
+ fasta.setName(fasta.getName() + " " + sum.toString());
fasta.append(value.second);
}
// otherwise, merge them
else {
- sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop());
+ sum = sum.setStop(sum, value.first.getStop());
fasta.append(value.second);
}
return sum;