The cleaner now adds the OC (original cigar) and OS (original alignment start) tags as appropriate to reads that get realigned; this feature can be turned off. Also, improved integration tests (sorry, Kiran!).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3657 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-06-28 16:46:47 +00:00
parent cc8d8eaedb
commit 1292c96e29
2 changed files with 29 additions and 13 deletions

View File

@ -53,6 +53,9 @@ import java.util.*;
*/
public class IndelRealigner extends ReadWalker<Integer, Integer> {
public static final String ORIGINAL_CIGAR_TAG = "OC";
public static final String ORIGINAL_START_TAG = "OS";
@Argument(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
protected String intervalsFile = null;
@ -102,6 +105,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
protected boolean NO_PG_TAG = false;
@Argument(fullName="no_original_alignment_tags", shortName="noTags", required=false, doc="Don't output the original cigar or alignment start tags for each realigned read in the output bam.")
protected boolean NO_ORIGINAL_ALIGNMENT_TAGS = false;
@Argument(fullName="targetIntervalsAreNotSorted", shortName="targetNotSorted", required=false, doc="This tool assumes that the target interval list is sorted; if the list turns out to be unsorted, it will throw an exception. Use this argument when your interval list is not sorted to instruct the Realigner to first sort it in memory.")
protected boolean TARGET_NOT_SORTED = false;
@ -635,7 +641,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
} else if ( statsOutput != null ) {
try {
statsOutput.write(String.format("%s\tFAIL\t%.1f\t%d%n",
statsOutput.write(String.format("%s\tFAIL\t%.1f%n",
readsToClean.getLocation().toString(), improvement));
statsOutput.flush();
} catch (Exception e) {}
@ -1125,6 +1131,13 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
if ( newStart == -1 )
newStart = read.getAlignmentStart();
// annotate the record with the original cigar (and optionally the alignment start)
if ( !NO_ORIGINAL_ALIGNMENT_TAGS ) {
read.setAttribute(ORIGINAL_CIGAR_TAG, read.getCigar().toString());
if ( newStart != read.getAlignmentStart() )
read.setAttribute(ORIGINAL_START_TAG, read.getAlignmentStart());
}
// if it's a paired end read, we need to update the insert size
if ( read.getReadPairedFlag() ) {
int insertSize = read.getInferredInsertSize();
@ -1133,7 +1146,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
read.setInferredInsertSize(insertSize + read.getAlignmentStart() - newStart);
read.setAlignmentStart(newStart);
} else {
// note that the correct order of actions is crucial here
// note that the correct order of actions is crucial here (we can't set the new cigar too early)
int oldEnd = read.getAlignmentEnd();
read.setCigar(newCigar);
read.setAlignmentStart(newStart);

View File

@ -6,21 +6,24 @@ import org.junit.Test;
import java.util.Arrays;
public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testRealigner() {
String[] md5lod5 = {"d9cbff4832fc3ee7a7ad1c58cc891bdd", "d4d8ff567b614729ab8c52bd7d6bef48"};
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe",
@Test
public void testRealignerLod5() {
String[] md5lod5 = {"56f1fb75cae706a5a6278257ea2f2598", "18fca887d1eb7dc300e717ae03b9da62"};
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10030000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe",
2,
Arrays.asList(md5lod5));
executeTest("test Lod5", spec1);
executeTest("test realigner lod5", spec);
}
String[] md5lod200 = {"d9cbff4832fc3ee7a7ad1c58cc891bdd", "d4d8ff567b614729ab8c52bd7d6bef48"};
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 200 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023800-10332350 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -snps %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe",
@Test
public void testRealignerLod50() {
String[] md5lod50 = {"56f1fb75cae706a5a6278257ea2f2598", "9537e4f195ce5840136f60fb61201369"};
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 50 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10030000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe",
2,
Arrays.asList(md5lod200));
executeTest("test Lod200", spec2);
Arrays.asList(md5lod50));
executeTest("test realigner lod50", spec);
}
}