Following up Mark's recent commit: hooking up the --maxPositionalMoveAllowed argument into the indel realigner and through to the SAM writer. We now ensure that no read is realigned more than N bases (200 by default, which is nowhere close to realistically possible). If anyone ever sees a warning message about this with the default value then please let me know because I need to see it for myself.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5331 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2011-02-28 04:40:54 +00:00
parent 874406352c
commit 05fac8583d
6 changed files with 38 additions and 23 deletions

View File

@ -30,5 +30,6 @@ public interface StingSAMFileWriter extends SAMFileWriter {
public void setMaxRecordsInRam(int maxRecordsInRam);
public void setMaxInsertSizeForMovingReadPairs(int maxInsertSizeForMovingReadPairs);
public void setMaxPositionalMoveAllowed(int maxPositionalMoveAllowed);
public void setUseConstrainedFileWriter(boolean useConstrainedFileWriter);
}

View File

@ -82,7 +82,7 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
throw new UserException("Unable to write to SAM file; neither a target file nor a stream has been specified");
if ( stub.useConstrainedFileWriter() ) {
this.writer = new ConstrainedMateFixingSAMFileWriter(writer, stub.getMaxInsertSizeForMovingReadPairs());
this.writer = new ConstrainedMateFixingSAMFileWriter(writer, stub.getMaxInsertSizeForMovingReadPairs(), stub.getMaxPositionalMoveAllowed());
}
}

View File

@ -101,6 +101,14 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
*/
private boolean writeStarted = false;
/**
* Arguments used for the constrained mate pair fixing file writer
*/
private boolean useConstrainedFileWriter = false;
private int maxInsertSizeForMovingReadPairs = -1;
private int maxPositionalMoveAllowed = -1;
/**
* HMM for BAQ, if needed
*/
@ -258,12 +266,6 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
outputTracker.getStorage(this).close();
}
//
// Experimental arguments for the constrained SAMFileWriter
//
private boolean useConstrainedFileWriter = false;
private int maxInsertSizeForMovingReadPairs = -1;
public int getMaxInsertSizeForMovingReadPairs() {
return maxInsertSizeForMovingReadPairs;
}
@ -272,6 +274,14 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
this.maxInsertSizeForMovingReadPairs = maxInsertSizeForMovingReadPairs;
}
public int getMaxPositionalMoveAllowed() {
return maxPositionalMoveAllowed;
}
public void setMaxPositionalMoveAllowed(int maxPositionalMoveAllowed) {
this.maxPositionalMoveAllowed = maxPositionalMoveAllowed;
}
public boolean useConstrainedFileWriter() {
return useConstrainedFileWriter;
}

View File

@ -101,6 +101,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
@Argument(fullName="maxIsizeForMovement", shortName="maxIsize", doc="maximum insert size of read pairs that we attempt to realign", required=false)
protected int MAX_ISIZE_FOR_MOVEMENT = 3000;
@Argument(fullName="maxPositionalMoveAllowed", shortName="maxPosMove", doc="maximum positional move in basepairs that a read can be adjusted during realignment", required=false)
protected int MAX_POS_MOVE_ALLOWED = 200;
@Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false)
protected int MAX_CONSENSUSES = 30;
@ -378,6 +381,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
writer.setUseConstrainedFileWriter(true);
writer.setMaxInsertSizeForMovingReadPairs(MAX_ISIZE_FOR_MOVEMENT);
writer.setMaxPositionalMoveAllowed(MAX_POS_MOVE_ALLOWED);
}
private void emit(final SAMRecord read) {
@ -406,7 +410,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private void emitReadLists() {
// pre-merge lists with priority queue for constrained SAMFileWriter
logger.warn("EMIT currentInterval " + currentInterval);
//logger.warn("EMIT currentInterval " + currentInterval);
readsNotToClean.addAll(readsToClean.getReads());
emit(ReadUtils.coordinateSortReads(readsNotToClean));
readsToClean.clear();
@ -1430,6 +1434,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
return false;
if ( newStart == -1 )
newStart = read.getAlignmentStart();
else if ( Math.abs(newStart - read.getAlignmentStart()) > MAX_POS_MOVE_ALLOWED ) {
logger.warn(String.format("Attempting to realign read %s at %d more than %d bases to %d.", read.getReadName(), read.getAlignmentStart(), MAX_POS_MOVE_ALLOWED, newStart));
return false;
}
// annotate the record with the original cigar (and optionally the alignment start)
if ( !NO_ORIGINAL_ALIGNMENT_TAGS ) {

View File

@ -75,17 +75,16 @@ import java.util.*;
* @version 0.1
*/
public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter {
final protected static Logger logger = Logger.getLogger(ConstrainedMateFixingSAMFileWriter.class);
private final static boolean DEBUG = false;
protected static final Logger logger = Logger.getLogger(ConstrainedMateFixingSAMFileWriter.class);
private static final boolean DEBUG = false;
/** How often do we check whether we want to emit reads? */
private final static int EMIT_FREQUENCY = 1000;
/**
* How much could a single read move in position from its original position?
* todo -- this really should be a provided parameter
*/
private final static int MAX_POS_MOVE_ALLOWED = 200;
private int MAX_POS_MOVE_ALLOWED;
/** how we order our SAM records */
private final SAMRecordComparator comparer = new SAMRecordCoordinateComparator();
@ -122,15 +121,19 @@ public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter {
public ConstrainedMateFixingSAMFileWriter(final SAMFileHeader header,
final File outputFile,
final int compressionLevel,
final int maxInsertSizeForMovingReadPairs) {
final int maxInsertSizeForMovingReadPairs,
final int maxMoveAllowed) {
this(new SAMFileWriterFactory().makeBAMWriter(header, true, outputFile, compressionLevel),
maxInsertSizeForMovingReadPairs);
maxInsertSizeForMovingReadPairs,
maxMoveAllowed);
}
public ConstrainedMateFixingSAMFileWriter(final SAMFileWriter finalDestination,
final int maxInsertSizeForMovingReadPairs) {
final int maxInsertSizeForMovingReadPairs,
final int maxmoveAllowed) {
this.finalDestination = finalDestination;
this.maxInsertSizeForMovingReadPairs = maxInsertSizeForMovingReadPairs;
this.MAX_POS_MOVE_ALLOWED = maxmoveAllowed;
//timer.start();
//lastProgressPrintTime = timer.currentTime();

View File

@ -1,16 +1,12 @@
package org.broadinstitute.sting.utils.sam;
import com.sun.xml.internal.messaging.saaj.packaging.mime.util.OutputUtil;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.sam.SamFileValidator;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import java.io.File;
@ -21,9 +17,6 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
/*
* Copyright (c) 2009 The Broad Institute
@ -86,7 +79,7 @@ public class ConstrainedMateFixingSAMFileWriterUnitTest extends BaseTest {
}
private ConstrainedMateFixingSAMFileWriter makeWriter(final int maxInsertSizeForMovingReadPairs) {
return new ConstrainedMateFixingSAMFileWriter(bamIn.getFileHeader(), OUTPUT_FILE, 5, maxInsertSizeForMovingReadPairs);
return new ConstrainedMateFixingSAMFileWriter(bamIn.getFileHeader(), OUTPUT_FILE, 5, maxInsertSizeForMovingReadPairs, 200);
}
private List<SAMRecord> readBAM(File file) {