Addition for Tim: recalculate the NM and UQ tags after realignment. Also, don't fix the insert size calculation, since that's done by fix mate information.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4227 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-09-08 04:02:14 +00:00
parent 84ddadca64
commit 65edbced36
2 changed files with 17 additions and 25 deletions

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.*;
import net.sf.samtools.util.StringUtil;
import net.sf.samtools.util.SequenceUtil;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.*;
@ -83,7 +84,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
@Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the SAMFileWriter. "+
"If too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool may run out of memory.", required=false)
protected int MAX_RECORDS_IN_RAM = 500000;
// ADVANCED OPTIONS FOLLOW
@Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false)
@ -583,8 +584,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// however we don't have enough info to use the proper MAQ scoring system.
// For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010].
// TODO -- we need a better solution here
aRead.getRead().setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
aRead.getRead().setAttribute("NM", AlignmentUtils.numMismatches(aRead.getRead(), reference, aRead.getRead().getAlignmentStart()-(int)leftmostIndex));
SAMRecord read = aRead.getRead();
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
// fix the attribute tags
if ( read.getAttribute(SAMTag.NM.name()) != null )
read.setAttribute("NM", SequenceUtil.calculateSamNmTag(read, reference, (int)leftmostIndex-1));
if ( read.getAttribute(SAMTag.UQ.name()) != null )
read.setAttribute("UQ", SequenceUtil.sumQualitiesOfMismatches(read, reference, (int)leftmostIndex-1));
}
}
}
@ -1210,25 +1217,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
if ( newStart != read.getAlignmentStart() )
read.setAttribute(ORIGINAL_POSITION_TAG, read.getAlignmentStart());
}
// if it's a paired end read, we need to update the insert size
if ( read.getReadPairedFlag() ) {
int insertSize = read.getInferredInsertSize();
if ( insertSize > 0 ) {
read.setCigar(newCigar);
read.setInferredInsertSize(insertSize + read.getAlignmentStart() - newStart);
read.setAlignmentStart(newStart);
} else {
// note that the correct order of actions is crucial here (we can't set the new cigar too early)
int oldEnd = read.getAlignmentEnd();
read.setCigar(newCigar);
read.setAlignmentStart(newStart);
read.setInferredInsertSize(insertSize + oldEnd - read.getAlignmentEnd());
}
} else {
read.setCigar(newCigar);
read.setAlignmentStart(newStart);
}
read.setCigar(newCigar);
read.setAlignmentStart(newStart);
return true;
}

View File

@ -9,7 +9,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testRealignerLod5() {
String[] md5s = {"ecff0ce35feb839b6346197db455bbdf", "c4ef635f2597b12b93a73199f07e509b"};
String[] md5s = {"2265e03ed1a2290e60208559925c30ef", "c4ef635f2597b12b93a73199f07e509b"};
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10030000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -o %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe",
2,
@ -19,7 +19,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testRealignerLod50() {
String[] md5s = {"ecff0ce35feb839b6346197db455bbdf", "3735a510513b6fa4161d92155e026283"};
String[] md5s = {"2265e03ed1a2290e60208559925c30ef", "3735a510513b6fa4161d92155e026283"};
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 50 -maxConsensuses 100 -greedy 100 -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10030000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -o %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe",
2,
@ -29,7 +29,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testRealignerKnownsOnly() {
String[] md5s = {"70b657dcb654bd2c84c38d654c5af697", "74652bd8240291293ec921f8ecfa1622"};
String[] md5s = {"dc2fe2856840b12a34bb57de2ca71cd0", "74652bd8240291293ec921f8ecfa1622"};
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 1.0 -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10076000 -compress 1 -targetIntervals " + validationDataLocation + "NA12878.indels.intervals -B:knownIndels,VCF " + validationDataLocation + "NA12878.indels.vcf4 -o %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe -knownsOnly",
2,