Merge pull request #1040 from broadinstitute/rhl_fasta_ref_maker
Merge contiguous intervals properly, closes #1035
This commit is contained in:
commit
a4dde8f500
|
|
@ -59,23 +59,68 @@ import java.util.Arrays;
|
|||
|
||||
public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
||||
|
||||
private static String CONTIGUOUS_INTERVAL_SAME_CONTIG_MD5 = "e1f4b93f9071d158d94dc4fb25e07702";
|
||||
private static String CONTIGUOUS_INTERVAL_DIFF_CONTIG_MD5 = "dfca4e0b0fe0cb18596ec51af541a69e";
|
||||
|
||||
@Test
|
||||
public void testReferenceOnly() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s",
|
||||
1,
|
||||
Arrays.asList("328d2d52cedfdc52da7d1abff487633d"));
|
||||
Arrays.asList("75d4d352a9ce4fae22fd7924a42c800a"));
|
||||
executeTest("test FastaReference", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReferenceOnlyContiguousSameContig() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,200 -L 1:10,000,201-10,000,301 -o %s",
|
||||
1,
|
||||
Arrays.asList(CONTIGUOUS_INTERVAL_SAME_CONTIG_MD5));
|
||||
executeTest("test FastaReference with contiguous intervals, same contig", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReferenceOnlyContiguousDiffContigs() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,200 -L 2:10,000,201-10,000,301 -o %s",
|
||||
1,
|
||||
Arrays.asList(CONTIGUOUS_INTERVAL_DIFF_CONTIG_MD5));
|
||||
executeTest("test FastaReference with contiguous intervals, different contigs", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAlternateReferenceContiguousSameContig() {
|
||||
// Show that FastaAlternateReferenceMaker behaves the same as FastaReferenceMaker across contiguous intervals on the same contig.
|
||||
// Note that there are variant locations in this interval.
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L 1:10,000,100-10,000,200 -L 1:10,000,201-10,000,301 -o %s",
|
||||
1,
|
||||
Arrays.asList(CONTIGUOUS_INTERVAL_SAME_CONTIG_MD5));
|
||||
executeTest("test Alternate FastaReference with contiguous intervals, same contig", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAlternateReferenceContiguousDiffContigs() {
|
||||
// Show that FastaAlternateReferenceMaker behaves the same as FastaReferenceMaker across contiguous intervals on different contigs.
|
||||
// Note that there are variant locations in this interval.
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L 1:10,000,100-10,000,200 -L 2:10,000,201-10,000,301 -o %s",
|
||||
1,
|
||||
Arrays.asList(CONTIGUOUS_INTERVAL_DIFF_CONTIG_MD5));
|
||||
executeTest("test Alternate FastaReference with contiguous intervals, different contigs", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndelsAndSnpMask() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
|
||||
1,
|
||||
Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
|
||||
Arrays.asList("375efb2feb017f01339f680fdffac6cd"));
|
||||
executeTest("test indels", spec);
|
||||
}
|
||||
|
||||
|
|
@ -85,7 +130,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500 -L 1:10,029,200-10,029,500 -o %s",
|
||||
1,
|
||||
Arrays.asList("8b6cd2e20c381f9819aab2d270f5e641"));
|
||||
Arrays.asList("81e30f0ab92684c496343c8ea51a393e"));
|
||||
executeTest("test SNPs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -108,7 +153,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b37KGReference + " --use_IUPAC_sample NA12878 -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf -L 20:61050-66380 -o %s",
|
||||
1,
|
||||
Arrays.asList("5feb2a576ff2ed1745a007eaa36448b3"));
|
||||
Arrays.asList("8fd887bca9f3949f2c23c3565f7dcc1b"));
|
||||
executeTest("test iupac", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,7 +55,15 @@ import java.io.PrintStream;
|
|||
*
|
||||
* <h3>Output</h3>
|
||||
* <p>
|
||||
* A fasta file representing the requested intervals.
|
||||
* A fasta file representing the requested intervals. Each interval has a description line starting with a greater-than (">") symbol followed by sequence data.
|
||||
* The description begins with the contig name followed by the beginning position on the contig.
|
||||
* <pre>
|
||||
* For example, the fasta file for contig 1 and intervals 1:3-1:4 and 1:6-1:9
|
||||
* >1 1:3
|
||||
* AT
|
||||
* >1 1:6
|
||||
* GGGG
|
||||
* </pre>
|
||||
* </p>
|
||||
*
|
||||
* <h3>Usage example</h3>
|
||||
|
|
@ -104,18 +112,20 @@ public class FastaReferenceMaker extends RefWalker<Pair<GenomeLoc, String>, Geno
|
|||
// if there is no interval to the left, then this is the first one
|
||||
if ( sum == null ) {
|
||||
sum = value.first;
|
||||
fasta.setName(fasta.getName() + " " + sum.toString());
|
||||
fasta.append(value.second);
|
||||
}
|
||||
// if the intervals don't overlap, print out the leftmost one and start a new one
|
||||
// if the intervals are not contiguous, print out the leftmost one and start a new one
|
||||
// (end of contig or new interval)
|
||||
else if ( value.first.getStart() != sum.getStop() + 1 ) {
|
||||
else if ( value.first.getStart() != sum.getStop() + 1 || ! value.first.getContig().equals(sum.getContig()) ) {
|
||||
fasta.flush();
|
||||
sum = value.first;
|
||||
fasta.setName(fasta.getName() + " " + sum.toString());
|
||||
fasta.append(value.second);
|
||||
}
|
||||
// otherwise, merge them
|
||||
else {
|
||||
sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop());
|
||||
sum = sum.setStop(sum, value.first.getStop());
|
||||
fasta.append(value.second);
|
||||
}
|
||||
return sum;
|
||||
|
|
|
|||
Loading…
Reference in New Issue