Following up Mark's recent commit: hooking up the --maxPositionalMoveAllowed argument into the indel realigner and through to the SAM writer. We now ensure that no read is realigned more than N bases (200 by default, which is nowhere close to realistically possible). If anyone ever sees a warning message about this with the default value then please let me know because I need to see it for myself.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5331 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2011-02-28 04:40:54 +00:00
parent 874406352c
commit 05fac8583d
6 changed files with 38 additions and 23 deletions

View File

@ -30,5 +30,6 @@ public interface StingSAMFileWriter extends SAMFileWriter {
public void setMaxRecordsInRam(int maxRecordsInRam); public void setMaxRecordsInRam(int maxRecordsInRam);
public void setMaxInsertSizeForMovingReadPairs(int maxInsertSizeForMovingReadPairs); public void setMaxInsertSizeForMovingReadPairs(int maxInsertSizeForMovingReadPairs);
public void setMaxPositionalMoveAllowed(int maxPositionalMoveAllowed);
public void setUseConstrainedFileWriter(boolean useConstrainedFileWriter); public void setUseConstrainedFileWriter(boolean useConstrainedFileWriter);
} }

View File

@ -82,7 +82,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
throw new UserException("Unable to write to SAM file; neither a target file nor a stream has been specified"); throw new UserException("Unable to write to SAM file; neither a target file nor a stream has been specified");
if ( stub.useConstrainedFileWriter() ) { if ( stub.useConstrainedFileWriter() ) {
this.writer = new ConstrainedMateFixingSAMFileWriter(writer, stub.getMaxInsertSizeForMovingReadPairs()); this.writer = new ConstrainedMateFixingSAMFileWriter(writer, stub.getMaxInsertSizeForMovingReadPairs(), stub.getMaxPositionalMoveAllowed());
} }
} }

View File

@ -101,6 +101,14 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
*/ */
private boolean writeStarted = false; private boolean writeStarted = false;
/**
* Arguments used for the constrained mate pair fixing file writer
*/
private boolean useConstrainedFileWriter = false;
private int maxInsertSizeForMovingReadPairs = -1;
private int maxPositionalMoveAllowed = -1;
/** /**
* HMM for BAQ, if needed * HMM for BAQ, if needed
*/ */
@ -258,12 +266,6 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
outputTracker.getStorage(this).close(); outputTracker.getStorage(this).close();
} }
//
// Experimental arguments for the constrained SAMFileWriter
//
private boolean useConstrainedFileWriter = false;
private int maxInsertSizeForMovingReadPairs = -1;
public int getMaxInsertSizeForMovingReadPairs() { public int getMaxInsertSizeForMovingReadPairs() {
return maxInsertSizeForMovingReadPairs; return maxInsertSizeForMovingReadPairs;
} }
@ -272,6 +274,14 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
this.maxInsertSizeForMovingReadPairs = maxInsertSizeForMovingReadPairs; this.maxInsertSizeForMovingReadPairs = maxInsertSizeForMovingReadPairs;
} }
public int getMaxPositionalMoveAllowed() {
return maxPositionalMoveAllowed;
}
public void setMaxPositionalMoveAllowed(int maxPositionalMoveAllowed) {
this.maxPositionalMoveAllowed = maxPositionalMoveAllowed;
}
public boolean useConstrainedFileWriter() { public boolean useConstrainedFileWriter() {
return useConstrainedFileWriter; return useConstrainedFileWriter;
} }

View File

@ -101,6 +101,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
@Argument(fullName="maxIsizeForMovement", shortName="maxIsize", doc="maximum insert size of read pairs that we attempt to realign", required=false) @Argument(fullName="maxIsizeForMovement", shortName="maxIsize", doc="maximum insert size of read pairs that we attempt to realign", required=false)
protected int MAX_ISIZE_FOR_MOVEMENT = 3000; protected int MAX_ISIZE_FOR_MOVEMENT = 3000;
@Argument(fullName="maxPositionalMoveAllowed", shortName="maxPosMove", doc="maximum positional move in basepairs that a read can be adjusted during realignment", required=false)
protected int MAX_POS_MOVE_ALLOWED = 200;
@Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false) @Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false)
protected int MAX_CONSENSUSES = 30; protected int MAX_CONSENSUSES = 30;
@ -378,6 +381,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
writer.setUseConstrainedFileWriter(true); writer.setUseConstrainedFileWriter(true);
writer.setMaxInsertSizeForMovingReadPairs(MAX_ISIZE_FOR_MOVEMENT); writer.setMaxInsertSizeForMovingReadPairs(MAX_ISIZE_FOR_MOVEMENT);
writer.setMaxPositionalMoveAllowed(MAX_POS_MOVE_ALLOWED);
} }
private void emit(final SAMRecord read) { private void emit(final SAMRecord read) {
@ -406,7 +410,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private void emitReadLists() { private void emitReadLists() {
// pre-merge lists with priority queue for constrained SAMFileWriter // pre-merge lists with priority queue for constrained SAMFileWriter
logger.warn("EMIT currentInterval " + currentInterval); //logger.warn("EMIT currentInterval " + currentInterval);
readsNotToClean.addAll(readsToClean.getReads()); readsNotToClean.addAll(readsToClean.getReads());
emit(ReadUtils.coordinateSortReads(readsNotToClean)); emit(ReadUtils.coordinateSortReads(readsNotToClean));
readsToClean.clear(); readsToClean.clear();
@ -1430,6 +1434,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
return false; return false;
if ( newStart == -1 ) if ( newStart == -1 )
newStart = read.getAlignmentStart(); newStart = read.getAlignmentStart();
else if ( Math.abs(newStart - read.getAlignmentStart()) > MAX_POS_MOVE_ALLOWED ) {
logger.warn(String.format("Attempting to realign read %s at %d more than %d bases to %d.", read.getReadName(), read.getAlignmentStart(), MAX_POS_MOVE_ALLOWED, newStart));
return false;
}
// annotate the record with the original cigar (and optionally the alignment start) // annotate the record with the original cigar (and optionally the alignment start)
if ( !NO_ORIGINAL_ALIGNMENT_TAGS ) { if ( !NO_ORIGINAL_ALIGNMENT_TAGS ) {

View File

@ -75,17 +75,16 @@ import java.util.*;
* @version 0.1 * @version 0.1
*/ */
public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter { public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter {
final protected static Logger logger = Logger.getLogger(ConstrainedMateFixingSAMFileWriter.class); protected static final Logger logger = Logger.getLogger(ConstrainedMateFixingSAMFileWriter.class);
private final static boolean DEBUG = false; private static final boolean DEBUG = false;
/** How often do we check whether we want to emit reads? */ /** How often do we check whether we want to emit reads? */
private final static int EMIT_FREQUENCY = 1000; private final static int EMIT_FREQUENCY = 1000;
/** /**
* How much could a single read move in position from its original position? * How much could a single read move in position from its original position?
* todo -- this really should be a provided parameter
*/ */
private final static int MAX_POS_MOVE_ALLOWED = 200; private int MAX_POS_MOVE_ALLOWED;
/** how we order our SAM records */ /** how we order our SAM records */
private final SAMRecordComparator comparer = new SAMRecordCoordinateComparator(); private final SAMRecordComparator comparer = new SAMRecordCoordinateComparator();
@ -122,15 +121,19 @@ public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter {
public ConstrainedMateFixingSAMFileWriter(final SAMFileHeader header, public ConstrainedMateFixingSAMFileWriter(final SAMFileHeader header,
final File outputFile, final File outputFile,
final int compressionLevel, final int compressionLevel,
final int maxInsertSizeForMovingReadPairs) { final int maxInsertSizeForMovingReadPairs,
final int maxMoveAllowed) {
this(new SAMFileWriterFactory().makeBAMWriter(header, true, outputFile, compressionLevel), this(new SAMFileWriterFactory().makeBAMWriter(header, true, outputFile, compressionLevel),
maxInsertSizeForMovingReadPairs); maxInsertSizeForMovingReadPairs,
maxMoveAllowed);
} }
public ConstrainedMateFixingSAMFileWriter(final SAMFileWriter finalDestination, public ConstrainedMateFixingSAMFileWriter(final SAMFileWriter finalDestination,
final int maxInsertSizeForMovingReadPairs) { final int maxInsertSizeForMovingReadPairs,
final int maxmoveAllowed) {
this.finalDestination = finalDestination; this.finalDestination = finalDestination;
this.maxInsertSizeForMovingReadPairs = maxInsertSizeForMovingReadPairs; this.maxInsertSizeForMovingReadPairs = maxInsertSizeForMovingReadPairs;
this.MAX_POS_MOVE_ALLOWED = maxmoveAllowed;
//timer.start(); //timer.start();
//lastProgressPrintTime = timer.currentTime(); //lastProgressPrintTime = timer.currentTime();

View File

@ -1,16 +1,12 @@
package org.broadinstitute.sting.utils.sam; package org.broadinstitute.sting.utils.sam;
import com.sun.xml.internal.messaging.saaj.packaging.mime.util.OutputUtil;
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.sam.SamFileValidator; import net.sf.picard.sam.SamFileValidator;
import net.sf.samtools.*; import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File; import java.io.File;
@ -21,9 +17,6 @@ import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -86,7 +79,7 @@ public class ConstrainedMateFixingSAMFileWriterUnitTest extends BaseTest {
} }
private ConstrainedMateFixingSAMFileWriter makeWriter(final int maxInsertSizeForMovingReadPairs) { private ConstrainedMateFixingSAMFileWriter makeWriter(final int maxInsertSizeForMovingReadPairs) {
return new ConstrainedMateFixingSAMFileWriter(bamIn.getFileHeader(), OUTPUT_FILE, 5, maxInsertSizeForMovingReadPairs); return new ConstrainedMateFixingSAMFileWriter(bamIn.getFileHeader(), OUTPUT_FILE, 5, maxInsertSizeForMovingReadPairs, 200);
} }
private List<SAMRecord> readBAM(File file) { private List<SAMRecord> readBAM(File file) {