Phase 3: constrained movement is now the only option available in the realigner (so I guess technically it's not really an option). Several command-line options are deprecated. Code cleaned up. Wiki updated. Release coming. One phase left...
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5299 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
93888e570b
commit
ee6f112556
|
|
@ -44,7 +44,6 @@ import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
|
||||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
|
@ -96,12 +95,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
@Argument(fullName="doNotUseSW", shortName="doNotUseSW", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs or indels in the reads for constructing the alternate references.")
|
@Argument(fullName="doNotUseSW", shortName="doNotUseSW", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs or indels in the reads for constructing the alternate references.")
|
||||||
protected boolean NO_SW = false;
|
protected boolean NO_SW = false;
|
||||||
|
|
||||||
@Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the SAMFileWriter. "+
|
|
||||||
"If too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool may run out of memory.", required=false)
|
|
||||||
protected int MAX_RECORDS_IN_RAM = 500000;
|
|
||||||
|
|
||||||
// ADVANCED OPTIONS FOLLOW
|
// ADVANCED OPTIONS FOLLOW
|
||||||
|
|
||||||
|
@Argument(fullName="maxIsizeForMovement", shortName="maxIsize", doc="maximum insert size of read pairs that we attempt to realign", required=false)
|
||||||
|
protected int MAX_ISIZE_FOR_MOVEMENT = 3000;
|
||||||
|
|
||||||
@Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false)
|
@Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false)
|
||||||
protected int MAX_CONSENSUSES = 30;
|
protected int MAX_CONSENSUSES = 30;
|
||||||
|
|
||||||
|
|
@ -112,20 +111,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
"if this value is exceeded, realignment is not attempted and the reads are passed to the output file(s) as-is", required=false)
|
"if this value is exceeded, realignment is not attempted and the reads are passed to the output file(s) as-is", required=false)
|
||||||
protected int MAX_READS = 20000;
|
protected int MAX_READS = 20000;
|
||||||
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName="doNotSortEvenThoughItIsHighlyUnsafe", required=false,
|
|
||||||
doc="Should we not sort the final bam at all?")
|
|
||||||
protected boolean DO_NOT_SORT = false;
|
|
||||||
|
|
||||||
@Argument(fullName="sortInCoordinateOrderEvenThoughItIsHighlyUnsafe", required=false,
|
|
||||||
doc="Should we sort the final bam in coordinate order even though it will be malformed because "+
|
|
||||||
"mate pairs of realigned reads will contain inaccurate information?")
|
|
||||||
protected boolean SORT_IN_COORDINATE_ORDER = false;
|
|
||||||
|
|
||||||
@Argument(fullName="realignReadsWithBadMates", required=false,
|
|
||||||
doc="Should we try to realign paired-end reads whose mates map to other chromosomes?")
|
|
||||||
protected boolean REALIGN_BADLY_MATED_READS = false;
|
|
||||||
|
|
||||||
@Argument(fullName="noPGTag", shortName="noPG", required=false,
|
@Argument(fullName="noPGTag", shortName="noPG", required=false,
|
||||||
doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. "+
|
doc="Don't output the usual PG tag in the realigned bam file header. FOR DEBUGGING PURPOSES ONLY. "+
|
||||||
"This option is required in order to pass integration tests.")
|
"This option is required in order to pass integration tests.")
|
||||||
|
|
@ -159,6 +144,21 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
protected boolean CHECKEARLY = false;
|
protected boolean CHECKEARLY = false;
|
||||||
|
|
||||||
|
|
||||||
|
// DEPRECATED
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
|
@Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="This argument is no longer used.", required=false)
|
||||||
|
protected int DEPRECATED_MAX_IN_RAM = 0;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
|
@Argument(fullName="sortInCoordinateOrderEvenThoughItIsHighlyUnsafe", doc="This argument is no longer used.", required=false)
|
||||||
|
protected boolean DEPRECATED_SORT_IN_COORDINATE_ORDER = false;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
|
@Argument(fullName="realignReadsWithBadMates", doc="This argument is no longer used.", required=false)
|
||||||
|
protected boolean DEPRECATED_REALIGN_MATES = false;
|
||||||
|
|
||||||
|
|
||||||
// DEBUGGING OPTIONS FOLLOW
|
// DEBUGGING OPTIONS FOLLOW
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
|
|
@ -173,16 +173,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
@Output(fullName="SNPsFileForDebugging", shortName="snps", doc="print out whether mismatching columns do or don't get cleaned out; FOR DEBUGGING PURPOSES ONLY", required=false)
|
@Output(fullName="SNPsFileForDebugging", shortName="snps", doc="print out whether mismatching columns do or don't get cleaned out; FOR DEBUGGING PURPOSES ONLY", required=false)
|
||||||
protected String OUT_SNPS = null;
|
protected String OUT_SNPS = null;
|
||||||
|
|
||||||
//
|
|
||||||
// Experimental output constraints
|
|
||||||
//
|
|
||||||
// TODO -- eric promised me he'll validate this further and release to the world as the only option to do cleaning
|
|
||||||
//
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName="constrainMovement", shortName="CM", required=false, doc="If provided, we'll try the experimental constraining output system")
|
|
||||||
protected boolean CONSTRAIN_MOVEMENT = false;
|
|
||||||
protected int MAX_ISIZE_FOR_MOVEMENT = 3000;
|
|
||||||
|
|
||||||
// fasta reference reader to supplement the edges of the reference sequence
|
// fasta reference reader to supplement the edges of the reference sequence
|
||||||
private IndexedFastaSequenceFile referenceReader;
|
private IndexedFastaSequenceFile referenceReader;
|
||||||
|
|
||||||
|
|
@ -297,8 +287,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SAMFileWriterImpl.setDefaultMaxRecordsInRam(MAX_RECORDS_IN_RAM);
|
|
||||||
|
|
||||||
for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) {
|
for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) {
|
||||||
|
|
||||||
String fName = getToolkit().getReadsDataSource().getSAMFile(rid).getName();
|
String fName = getToolkit().getReadsDataSource().getSAMFile(rid).getName();
|
||||||
|
|
@ -323,7 +311,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
||||||
|
|
||||||
File f = new File(outName);
|
File f = new File(outName);
|
||||||
SAMFileWriter sw = new SAMFileWriterFactory().makeSAMOrBAMWriter(setupHeader(getToolkit().getSAMFileHeader(rid)), false, f);
|
SAMFileHeader header = getToolkit().getSAMFileHeader(rid);
|
||||||
|
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||||
|
SAMFileWriter sw = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, f);
|
||||||
nwayWriters.put(rid,sw);
|
nwayWriters.put(rid,sw);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -362,19 +352,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private SAMFileHeader setupHeader(SAMFileHeader header) {
|
|
||||||
if ( DO_NOT_SORT )
|
|
||||||
header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
|
||||||
else if ( SORT_IN_COORDINATE_ORDER || CONSTRAIN_MOVEMENT )
|
|
||||||
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
|
||||||
else
|
|
||||||
header.setSortOrder(SAMFileHeader.SortOrder.queryname);
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setupWriter(SAMFileHeader header) {
|
private void setupWriter(SAMFileHeader header) {
|
||||||
header = setupHeader(header);
|
|
||||||
|
|
||||||
if ( !NO_PG_TAG ) {
|
if ( !NO_PG_TAG ) {
|
||||||
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
|
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
|
||||||
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
||||||
|
|
@ -395,13 +374,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.writeHeader(header);
|
writer.writeHeader(header);
|
||||||
writer.setPresorted(CONSTRAIN_MOVEMENT);
|
writer.setPresorted(true);
|
||||||
writer.setMaxRecordsInRam(MAX_RECORDS_IN_RAM);
|
|
||||||
|
|
||||||
if ( CONSTRAIN_MOVEMENT ) {
|
writer.setUseConstrainedFileWriter(true);
|
||||||
writer.setUseConstrainedFileWriter(true);
|
writer.setMaxInsertSizeForMovingReadPairs(MAX_ISIZE_FOR_MOVEMENT);
|
||||||
writer.setMaxInsertSizeForMovingReadPairs(MAX_ISIZE_FOR_MOVEMENT);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void emit(final SAMRecord read) {
|
private void emit(final SAMRecord read) {
|
||||||
|
|
@ -489,19 +465,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean doNotTryToClean(SAMRecord read) {
|
private boolean doNotTryToClean(SAMRecord read) {
|
||||||
boolean immobileReadForWriting = CONSTRAIN_MOVEMENT && ConstrainedMateFixingSAMFileWriter.iSizeTooBigToMove(read, MAX_ISIZE_FOR_MOVEMENT);
|
return read.getReadUnmappedFlag() ||
|
||||||
|
|
||||||
boolean old = read.getReadUnmappedFlag() ||
|
|
||||||
read.getNotPrimaryAlignmentFlag() ||
|
read.getNotPrimaryAlignmentFlag() ||
|
||||||
read.getReadFailsVendorQualityCheckFlag() ||
|
read.getReadFailsVendorQualityCheckFlag() ||
|
||||||
read.getMappingQuality() == 0 ||
|
read.getMappingQuality() == 0 ||
|
||||||
read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ||
|
read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ||
|
||||||
(!REALIGN_BADLY_MATED_READS && BadMateFilter.hasBadMate(read));
|
ConstrainedMateFixingSAMFileWriter.iSizeTooBigToMove(read, MAX_ISIZE_FOR_MOVEMENT);
|
||||||
|
|
||||||
// if ( immobileReadForWriting && ! old)
|
|
||||||
// logger.warn("Newly skipping read: " + read + " isize = " + read.getInferredInsertSize());
|
|
||||||
|
|
||||||
return old || immobileReadForWriting;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void cleanAndCallMap(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
|
private void cleanAndCallMap(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
|
||||||
|
|
|
||||||
|
|
@ -73,8 +73,9 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
@Argument(fullName="maxIntervalSize", shortName="maxInterval", doc="maximum interval size", required=false)
|
@Argument(fullName="maxIntervalSize", shortName="maxInterval", doc="maximum interval size", required=false)
|
||||||
protected int maxIntervalSize = 500;
|
protected int maxIntervalSize = 500;
|
||||||
|
|
||||||
@Argument(fullName="realignReadsWithBadMates", required=false, doc="Should we try to realign paired-end reads whose mates map to other chromosomes?")
|
@Deprecated
|
||||||
protected boolean REALIGN_BADLY_MATED_READS = false;
|
@Argument(fullName="realignReadsWithBadMates", doc="This argument is no longer used.", required=false)
|
||||||
|
protected boolean DEPRECATED_REALIGN_MATES = false;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean generateExtendedEvents() { return true; }
|
public boolean generateExtendedEvents() { return true; }
|
||||||
|
|
@ -140,7 +141,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
||||||
int mismatchQualities = 0, totalQualities = 0;
|
int mismatchQualities = 0, totalQualities = 0;
|
||||||
byte refBase = ref.getBase();
|
byte refBase = ref.getBase();
|
||||||
for (PileupElement p : pileup ) {
|
for (PileupElement p : pileup ) {
|
||||||
if ( !REALIGN_BADLY_MATED_READS && BadMateFilter.hasBadMate(p.getRead()) )
|
if ( BadMateFilter.hasBadMate(p.getRead()) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// check the ends of the reads to see how far they extend
|
// check the ends of the reads to see how far they extend
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
||||||
private static final String mainTestBam = validationDataLocation + "indelRealignerTest.pilot1.ceu.bam";
|
private static final String mainTestBam = validationDataLocation + "indelRealignerTest.pilot1.ceu.bam";
|
||||||
private static final String mainTestIntervals = validationDataLocation + "indelRealignerTest.pilot1.ceu.intervals";
|
private static final String mainTestIntervals = validationDataLocation + "indelRealignerTest.pilot1.ceu.intervals";
|
||||||
private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf";
|
private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf";
|
||||||
private static final String baseCommandPrefix = "--constrainMovement -T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe ";
|
private static final String baseCommandPrefix = "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 ";
|
||||||
private static final String baseCommand = baseCommandPrefix + "-o %s ";
|
private static final String baseCommand = baseCommandPrefix + "-o %s ";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -91,7 +91,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testLongRun() {
|
public void testLongRun() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"--constrainMovement -T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe -o %s",
|
"-T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 -o %s",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("be859f9a98d738becee0526887cae42e"));
|
Arrays.asList("be859f9a98d738becee0526887cae42e"));
|
||||||
executeTest("realigner long run", spec);
|
executeTest("realigner long run", spec);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue