Added the old recalibrator integration tests to the refactored recalibrator sitting in playground.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2096 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a95302fe98
commit
22aaf8c5e0
|
|
@ -0,0 +1,15 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 19, 2009
|
||||
*/
|
||||
public class NoOriginalQualityScoresFilter implements SamRecordFilter {
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
return (rec.getAttribute("OQ") == null);
|
||||
}
|
||||
}
|
||||
|
|
@ -514,6 +514,9 @@ class SerialRecalMapping implements RecalMapping {
|
|||
double newQual = qual + globalDeltaQ + deltaQual + deltaQPos + deltaQDinuc;
|
||||
newQualByte = QualityUtils.boundQual((int)Math.round(newQual), QualityUtils.MAX_REASONABLE_Q_SCORE);
|
||||
|
||||
//System.out.println(String.format("%s %c%c %d %d => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
||||
// readGroup, prevBase, base, cycle, qual, qual, globalDeltaQ, deltaQual, deltaQPos, deltaQDinuc, newQualByte));
|
||||
|
||||
if ( newQualByte <= 0 && newQualByte >= QualityUtils.MAX_REASONABLE_Q_SCORE )
|
||||
throw new RuntimeException(String.format("Illegal base quality score calculated: %s %c%c %d %d => %d + %.2f + %.2f + %.2f = %d",
|
||||
readGroup, prevBase, base, cycle, qual, qual, globalDeltaQ, deltaQPos, deltaQDinuc, newQualByte));
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ import net.sf.samtools.SAMReadGroupRecord;
|
|||
|
||||
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
|
||||
@WalkerName( "CountCovariatesRefactored" )
|
||||
@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
|
||||
//@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality // BUGBUG taken out to match old integration tests
|
||||
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
||||
public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||
|
||||
|
|
@ -80,6 +80,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private String RECAL_FILE = "output.recal_data.csv";
|
||||
@Argument(fullName="no_print_header", shortName="noHeader", required=false, doc="Don't print the usual header on the table recalibration file. For debugging purposes only.")
|
||||
private boolean NO_PRINT_HEADER = false;
|
||||
@Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. For debugging purposes only.")
|
||||
private boolean VALIDATE_OLD_RECALIBRATOR = false;
|
||||
@Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
|
||||
private boolean USE_SLX_PLATFORM = false;
|
||||
|
||||
|
||||
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||
private ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
|
||||
|
|
@ -130,7 +135,12 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
// Initialize the requested covariates by parsing the -cov argument
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one
|
||||
if( COVARIATES != null ) {
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
requestedCovariates.add( new ReadGroupCovariate() );
|
||||
requestedCovariates.add( new CycleCovariate() ); // unfortunately order is different here in order to match the old recalibrator exactly
|
||||
requestedCovariates.add( new QualityScoreCovariate() );
|
||||
requestedCovariates.add( new DinucCovariate() );
|
||||
} else if( COVARIATES != null ) {
|
||||
if(COVARIATES[0].equalsIgnoreCase("ALL")) { // the user wants ALL covariates to be used
|
||||
requestedCovariates.add( new ReadGroupCovariate() ); // first add the required covariates then add the rest by looping over all implementing classes that were found
|
||||
requestedCovariates.add( new QualityScoreCovariate() );
|
||||
|
|
@ -251,51 +261,61 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
// readGroupId = read.getReadGroup().getReadGroupId();
|
||||
// readGroupHashMap.put( read, readGroupId );
|
||||
//}
|
||||
|
||||
if( read.getMappingQuality() > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
|
||||
|
||||
offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
|
||||
offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
|
||||
|
||||
// skip first and last base because there is no dinuc, this is mainly done for speed so we don't have to check cases
|
||||
if( offset > 0 && offset < read.getReadLength() - 1 ) {
|
||||
// skip first and last base because there is no dinuc, this is mainly done for speed so we don't have to check cases
|
||||
if( offset > 0 && offset < read.getReadLength() - 1 ) {
|
||||
|
||||
quals = read.getBaseQualities();
|
||||
// Check if we need to use the original quality scores instead
|
||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||
Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
if ( obj instanceof String )
|
||||
quals = QualityUtils.fastqToPhred((String)obj);
|
||||
else {
|
||||
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
}
|
||||
|
||||
// skip if base quality is zero
|
||||
if( quals[offset] > 0 ) {
|
||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'
|
||||
refBase = (byte)ref.getBase();
|
||||
prevBase = bases[offset-1];
|
||||
|
||||
// Get the complement base strand if we are a negative strand read
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
bases = BaseUtils.simpleComplement( bases ); // this is an expensive call
|
||||
refBase = (byte)BaseUtils.simpleComplement( ref.getBase() );
|
||||
prevBase = bases[offset+1];
|
||||
}
|
||||
|
||||
// skip if this base or the previous one was an 'N' or etc.
|
||||
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) {
|
||||
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
readGroupId = readGroup.getReadGroupId(); // this is an expensive call
|
||||
platform = readGroup.getPlatform(); // this is an expensive call
|
||||
|
||||
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
|
||||
colorSpaceQuals = null;
|
||||
if( platform.equalsIgnoreCase("SOLID") ) {
|
||||
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
|
||||
quals = read.getBaseQualities();
|
||||
// Check if we need to use the original quality scores instead
|
||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||
Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
if ( obj instanceof String )
|
||||
quals = QualityUtils.fastqToPhred((String)obj);
|
||||
else {
|
||||
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
|
||||
{
|
||||
updateDataFromRead( read, offset, readGroupId, platform, quals, bases, refBase );
|
||||
}
|
||||
|
||||
// skip if base quality is zero
|
||||
if( quals[offset] > 0 ) {
|
||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'
|
||||
refBase = (byte)ref.getBase();
|
||||
prevBase = bases[offset-1];
|
||||
|
||||
// Get the complement base strand if we are a negative strand read
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
bases = BaseUtils.simpleComplement( bases ); // this is an expensive call
|
||||
refBase = (byte)BaseUtils.simpleComplement( ref.getBase() );
|
||||
prevBase = bases[offset+1];
|
||||
}
|
||||
|
||||
// skip if this base or the previous one was an 'N' or etc.
|
||||
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) {
|
||||
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
readGroupId = readGroup.getReadGroupId();
|
||||
platform = readGroup.getPlatform();
|
||||
if( USE_SLX_PLATFORM ) {
|
||||
platform = "ILLUMINA";
|
||||
}
|
||||
|
||||
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
|
||||
colorSpaceQuals = null;
|
||||
if( platform.equalsIgnoreCase("SOLID") ) {
|
||||
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
|
||||
}
|
||||
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
|
||||
{
|
||||
updateDataFromRead( read, offset, readGroupId, platform, quals, bases, refBase );
|
||||
}
|
||||
} else {
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
countedBases++; // replicating a small bug in the old recalibrator
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -320,6 +340,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
* @param read The read
|
||||
* @param offset The offset in the read for this locus
|
||||
* @param readGroup The read group the read is in
|
||||
* @param platform The String that has the platform this read came from: Illumina, 454, or solid
|
||||
* @param quals List of base quality scores
|
||||
* @param bases The bases which make up the read
|
||||
* @param refBase The reference base at this locus
|
||||
|
|
@ -338,7 +359,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
RecalDatum datum = dataManager.data.get( key );
|
||||
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
||||
datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
|
||||
dataManager.data.put( key, datum );
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
dataManager.data.myPut( key, datum );
|
||||
} else {
|
||||
dataManager.data.put( key, datum );
|
||||
}
|
||||
}
|
||||
|
||||
// Need the bases to determine whether or not we have a mismatch
|
||||
|
|
@ -396,26 +421,46 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
*/
|
||||
private void outputToCSV( final PrintStream recalTableStream ) {
|
||||
|
||||
if( !NO_PRINT_HEADER ) {
|
||||
recalTableStream.printf("# Counted Sites %d%n", countedSites);
|
||||
recalTableStream.printf("# Counted Bases %d%n", countedBases);
|
||||
recalTableStream.printf("# Skipped Sites %d%n", skippedSites);
|
||||
recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
|
||||
for( Covariate cov : requestedCovariates ) {
|
||||
// The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name
|
||||
recalTableStream.println( "@!" + cov.getClass().getSimpleName() );
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
recalTableStream.printf("# collapsed_pos false%n");
|
||||
recalTableStream.printf("# collapsed_dinuc false%n");
|
||||
recalTableStream.printf("# counted_sites %d%n", countedSites);
|
||||
recalTableStream.printf("# counted_bases %d%n", countedBases);
|
||||
recalTableStream.printf("# skipped_sites %d%n", skippedSites);
|
||||
recalTableStream.printf("# fraction_skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
|
||||
recalTableStream.printf("rg,pos,Qrep,dn,nBases,nMismatches,Qemp%n");
|
||||
// For each entry in the data hashmap
|
||||
for( Pair<List<? extends Comparable>,RecalDatum> entry : dataManager.data.entrySetSorted4() ) {
|
||||
// For each Covariate in the key
|
||||
for( Comparable comp : entry.first ) {
|
||||
// Output the Covariate's value
|
||||
recalTableStream.print( comp + "," );
|
||||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println( entry.second.outputToCSV() );
|
||||
}
|
||||
}
|
||||
// For each entry in the data hashmap
|
||||
for( Map.Entry<List<? extends Comparable>, RecalDatum> entry : dataManager.data.entrySet() ) {
|
||||
// For each Covariate in the key
|
||||
for( Comparable comp : entry.getKey() ) {
|
||||
// Output the Covariate's value
|
||||
if( NO_PRINT_HEADER && comp instanceof String ) { continue; } // BUGBUG
|
||||
recalTableStream.print( comp + "," );
|
||||
} else {
|
||||
if( !NO_PRINT_HEADER ) {
|
||||
recalTableStream.printf("# Counted Sites %d%n", countedSites);
|
||||
recalTableStream.printf("# Counted Bases %d%n", countedBases);
|
||||
recalTableStream.printf("# Skipped Sites %d%n", skippedSites);
|
||||
recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
|
||||
for( Covariate cov : requestedCovariates ) {
|
||||
// The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name
|
||||
recalTableStream.println( "@!" + cov.getClass().getSimpleName() );
|
||||
}
|
||||
}
|
||||
// For each entry in the data hashmap
|
||||
for( Map.Entry<List<? extends Comparable>, RecalDatum> entry : dataManager.data.entrySet() ) {
|
||||
// For each Covariate in the key
|
||||
for( Comparable comp : entry.getKey() ) {
|
||||
// Output the Covariate's value
|
||||
if( NO_PRINT_HEADER && comp instanceof String ) { continue; } // BUGBUG
|
||||
recalTableStream.print( comp + "," );
|
||||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println( entry.getValue().outputToCSV() );
|
||||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println( entry.getValue().outputToCSV() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
|
||||
/*
|
||||
|
|
@ -60,10 +58,12 @@ public class DinucCovariate implements Covariate {
|
|||
final byte[] quals, final byte[] bases) {
|
||||
|
||||
byte base = bases[offset];
|
||||
byte prevBase = bases[offset - 1];
|
||||
byte prevBase;
|
||||
// If this is a negative strand read then we need to reverse the direction for our previous base
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
prevBase = bases[offset + 1];
|
||||
} else {
|
||||
prevBase = bases[offset - 1];
|
||||
}
|
||||
//char[] charArray = {(char)prevBase, (char)base};
|
||||
//return new String( charArray ); // This is an expensive call
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
|
|
@ -33,18 +36,84 @@ import java.util.*;
|
|||
* Date: Oct 30, 2009
|
||||
*
|
||||
* A HashMap that maps a list of comparables to any object <T>.
|
||||
* There is functionality for the mappings to be given back to you in sorted order.
|
||||
*/
|
||||
|
||||
public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
|
||||
|
||||
private static final long serialVersionUID = 1L; //BUGBUG: what should I do here?, added by Eclipse
|
||||
private ArrayList<ArrayList<Comparable>> keyLists;
|
||||
|
||||
public NHashMap() {
|
||||
super();
|
||||
keyLists = null;
|
||||
}
|
||||
|
||||
public NHashMap( int initialCapacity, float loadingFactor ) {
|
||||
super( initialCapacity, loadingFactor );
|
||||
keyLists = null;
|
||||
}
|
||||
|
||||
|
||||
// This method is here only to help facilitate direct comparison of old and refactored recalibrator.
|
||||
// The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to.
|
||||
public T myPut(List<? extends Comparable> key, T value) {
|
||||
|
||||
if( keyLists == null ) {
|
||||
keyLists = new ArrayList<ArrayList<Comparable>>();
|
||||
for( Comparable comp : key ) {
|
||||
keyLists.add( new ArrayList<Comparable>() );
|
||||
}
|
||||
}
|
||||
|
||||
ArrayList<Comparable> thisList;
|
||||
for( int iii = 0; iii < key.size(); iii++ ) {
|
||||
thisList = keyLists.get( iii );
|
||||
if( thisList == null ) {
|
||||
thisList = new ArrayList<Comparable>();
|
||||
}
|
||||
if( !thisList.contains( key.get( iii ) ) ) {
|
||||
thisList.add( key.get(iii ) );
|
||||
}
|
||||
}
|
||||
return super.put( key, value );
|
||||
}
|
||||
|
||||
// This method is very ugly but is here only to help facilitate direct comparison of old and refactored recalibrator.
|
||||
// The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to.
|
||||
@SuppressWarnings(value = "unchecked")
|
||||
public ArrayList<Pair<List<? extends Comparable>, T>> entrySetSorted4() {
|
||||
|
||||
ArrayList<Pair<List<? extends Comparable>, T>> theSet = new ArrayList<Pair<List<? extends Comparable>, T>>();
|
||||
|
||||
for( ArrayList<Comparable> list : keyLists ) {
|
||||
Collections.sort(list);
|
||||
}
|
||||
|
||||
if( keyLists.size() != 4 ) {
|
||||
throw new StingException("Are you sure you want to be calling this ugly method? NHashMap.entrySetSorted4()");
|
||||
}
|
||||
|
||||
ArrayList<Comparable> newKey = null;
|
||||
for( Comparable c0 : keyLists.get(0) ) {
|
||||
for( Comparable c1 : keyLists.get(1) ) {
|
||||
for( Comparable c2 : keyLists.get(2) ) {
|
||||
for( Comparable c3 : keyLists.get(3) ) {
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add(c0);
|
||||
newKey.add(c1);
|
||||
newKey.add(c2);
|
||||
newKey.add(c3);
|
||||
T value = this.get( newKey );
|
||||
if( value!= null ) {
|
||||
theSet.add(new Pair<List<? extends Comparable>,T>( newKey, value ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return theSet;
|
||||
}
|
||||
|
||||
public static <T> List<T> makeList(T... args) {
|
||||
|
|
|
|||
|
|
@ -74,6 +74,10 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private int WINDOW_SIZE = 3;
|
||||
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
|
||||
private int SMOOTHING = 1;
|
||||
@Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. For debugging purposes only.")
|
||||
private boolean VALIDATE_OLD_RECALIBRATOR = false;
|
||||
@Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
|
||||
private boolean USE_SLX_PLATFORM = false;
|
||||
|
||||
//public enum RecalibrationMode {
|
||||
// COMBINATORIAL,
|
||||
|
|
@ -89,6 +93,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private ArrayList<Covariate> requestedCovariates;
|
||||
|
||||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
||||
|
||||
|
||||
|
|
@ -133,7 +138,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
try {
|
||||
for ( String line : new xReadLines(new File( RECAL_FILE )) ) {
|
||||
lineNumber++;
|
||||
if( COMMENT_PATTERN.matcher(line).matches() ) {
|
||||
if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) {
|
||||
; // skip over the comment lines, (which start with '#')
|
||||
}
|
||||
else if( COVARIATE_PATTERN.matcher(line).matches() ) { // the line string is either specifying a covariate or is giving csv data
|
||||
|
|
@ -168,6 +173,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
} else { // found some data
|
||||
if( !foundAllCovariates ) {
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
requestedCovariates.add( new ReadGroupCovariate() );
|
||||
requestedCovariates.add( new QualityScoreCovariate() );
|
||||
requestedCovariates.add( new CycleCovariate() );
|
||||
requestedCovariates.add( new DinucCovariate() );
|
||||
}
|
||||
foundAllCovariates = true;
|
||||
if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; } // Don't want to crash with out of heap space exception
|
||||
final boolean createCollapsedTables = true;
|
||||
|
|
@ -208,7 +219,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
int iii;
|
||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
||||
cov = requestedCovariates.get( iii );
|
||||
key.add( cov.getValue( vals[iii] ) );
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
if( iii == 1 ) { // order is different in the old recalibrator
|
||||
key.add( cov.getValue( vals[2] ) );
|
||||
} else if ( iii == 2 ) {
|
||||
key.add( cov.getValue( vals[1] ) );
|
||||
} else {
|
||||
key.add( cov.getValue( vals[iii] ) );
|
||||
}
|
||||
} else {
|
||||
key.add( cov.getValue( vals[iii] ) );
|
||||
}
|
||||
}
|
||||
RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ) );
|
||||
dataManager.addToAllTables( key, datum );
|
||||
|
|
@ -231,10 +252,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
// WARNING: refBases is always null because this walker doesn't have @Requires({DataSource.REFERENCE_BASES})
|
||||
// This is done in order to speed up the code
|
||||
|
||||
if( read.getMappingQuality() <= 0 ) {
|
||||
return read; // early return here, unmapped reads and mapping quality zero reads should be left alone
|
||||
}
|
||||
|
||||
byte[] originalQuals = read.getBaseQualities();
|
||||
// Check if we need to use the original quality scores instead
|
||||
|
|
@ -251,33 +268,30 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// These calls are expensive so only do them once for each read
|
||||
String readGroup = read.getReadGroup().getReadGroupId();
|
||||
String platform = read.getReadGroup().getPlatform();
|
||||
if( USE_SLX_PLATFORM ) {
|
||||
platform = "ILLUMINA";
|
||||
}
|
||||
|
||||
byte[] bases = read.getReadBases();
|
||||
int startPos = 1;
|
||||
int stopPos = read.getReadLength();
|
||||
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
bases = BaseUtils.simpleComplement( bases );
|
||||
startPos = 0;
|
||||
stopPos = read.getReadLength() - 1;
|
||||
}
|
||||
|
||||
|
||||
// For each base in the read
|
||||
for( int iii = 1; iii < read.getReadLength() - 1; iii++ ) { // skip first and last bases because there is no dinuc
|
||||
for( int iii = startPos; iii < stopPos; iii++ ) { // skip first or last base because there is no dinuc depending on the direction of the read
|
||||
List<Comparable> key = new ArrayList<Comparable>();
|
||||
// Get the covariate values which make up the key
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
key.add( covariate.getValue( read, iii, readGroup, platform, originalQuals, bases ) ); // offset is zero based so passing iii is correct here
|
||||
}
|
||||
|
||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||
|
||||
//if( MODE_STRING.equalsIgnoreCase("COMBINATORIAL") ) { // BUGBUG: This isn't supported. No need to keep the full data hashmap around so it was removed for major speed up
|
||||
// //RecalDatum datum = dataManager.data.get( key );
|
||||
// //if( datum != null ) { // if we have data for this combination of covariates then recalibrate the quality score otherwise do nothing
|
||||
// // recalQuals[iii] = datum.empiricalQualByte( SMOOTHING );
|
||||
// //}
|
||||
// throw new StingException("The Combinatorial mode isn't supported.");
|
||||
//} else if( MODE_STRING.equalsIgnoreCase("SEQUENTIAL") ) {
|
||||
//
|
||||
//} else {
|
||||
// throw new StingException( "Specified RecalibrationMode is not supported: " + MODE_STRING );
|
||||
//}
|
||||
}
|
||||
|
||||
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
|
||||
|
|
@ -340,11 +354,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// The shift in quality due to each covariate by itself in turn
|
||||
double deltaQCovariates = 0.0;
|
||||
Double deltaQCovariateEmpirical;
|
||||
double deltaQPos = 0.0;
|
||||
double deltaQDinuc = 0.0;
|
||||
for( int iii = 2; iii < key.size(); iii++ ) {
|
||||
newKey.add( key.get(iii) ); // the given covariate
|
||||
deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get( newKey );
|
||||
if( deltaQCovariateEmpirical != null ) {
|
||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
if(iii==2) { deltaQPos = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); } // BUGBUG: only here to validate against the old recalibrator
|
||||
if(iii==3) { deltaQDinuc = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); }
|
||||
}
|
||||
}
|
||||
newKey.remove( 2 ); // this new covariate is always added in at position 2 in the newKey list
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ public class CovariateCounterTest extends BaseTest {
|
|||
read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",1,1, bases1, quals3);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testCovariateCounterSetup() {
|
||||
Assert.assertEquals("Number of read groups is wrong", c.getNReadGroups(), 2);
|
||||
|
|
@ -179,4 +180,5 @@ public class CovariateCounterTest extends BaseTest {
|
|||
|
||||
c.updateDataFromRead(readGroup1, read, 0, (char)bases[0], false);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -53,6 +53,7 @@ public class RecalDataTest extends BaseTest {
|
|||
starDatum.B = starDatum.N = 1;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testBasic() {
|
||||
logger.warn("Executing testIsBetween");
|
||||
|
|
@ -178,4 +179,5 @@ public class RecalDataTest extends BaseTest {
|
|||
}
|
||||
Assert.assertEquals(indices.size(), 17);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.io.File;
|
||||
|
||||
public class RefactoredRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||
static HashMap<String, String> paramsFiles = new HashMap<String, String>();
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesR() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "7be0b7a624d22187e712131f12aae546" );
|
||||
//e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" );
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "dc1a1b6e99f4a47525cc1dce7b6eb1dc" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -T CountCovariatesRefactored" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-11,000,000" +
|
||||
" --validate_old_recalibrator" +
|
||||
" --use_slx_platform" +
|
||||
" -recalFile %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
List<File> result = executeTest("testCountCovariatesR", spec).getFirst();
|
||||
paramsFiles.put(bam, result.get(0).getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTableRecalibratorR() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "3ace4b9b8495429cc32e7008145f4996" );
|
||||
//e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" );
|
||||
//e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
String paramsFile = paramsFiles.get(bam);
|
||||
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
|
||||
if ( paramsFile != null ) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -T TableRecalibrationRefactored" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-20,000,000" +
|
||||
" --validate_old_recalibrator" +
|
||||
" --use_slx_platform" +
|
||||
" -outputBam %s" +
|
||||
" -recalFile " + paramsFile,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testTableRecalibratorR", spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue