Added the old recalibrator integration tests to the refactored recalibrator sitting in playground.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2096 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-11-19 22:43:28 +00:00
parent a95302fe98
commit 22aaf8c5e0
9 changed files with 311 additions and 84 deletions

View File

@ -0,0 +1,15 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 19, 2009
*/
public class NoOriginalQualityScoresFilter implements SamRecordFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getAttribute("OQ") == null);
}
}

View File

@ -514,6 +514,9 @@ class SerialRecalMapping implements RecalMapping {
double newQual = qual + globalDeltaQ + deltaQual + deltaQPos + deltaQDinuc;
newQualByte = QualityUtils.boundQual((int)Math.round(newQual), QualityUtils.MAX_REASONABLE_Q_SCORE);
//System.out.println(String.format("%s %c%c %d %d => %d + %.2f + %.2f + %.2f + %.2f = %d",
// readGroup, prevBase, base, cycle, qual, qual, globalDeltaQ, deltaQual, deltaQPos, deltaQDinuc, newQualByte));
if ( newQualByte <= 0 && newQualByte >= QualityUtils.MAX_REASONABLE_Q_SCORE )
throw new RuntimeException(String.format("Illegal base quality score calculated: %s %c%c %d %d => %d + %.2f + %.2f + %.2f = %d",
readGroup, prevBase, base, cycle, qual, qual, globalDeltaQ, deltaQPos, deltaQDinuc, newQualByte));

View File

@ -64,7 +64,7 @@ import net.sf.samtools.SAMReadGroupRecord;
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
@WalkerName( "CountCovariatesRefactored" )
@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality
//@ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality // BUGBUG taken out to match old integration tests
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
@ -80,6 +80,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName="no_print_header", shortName="noHeader", required=false, doc="Don't print the usual header on the table recalibration file. For debugging purposes only.")
private boolean NO_PRINT_HEADER = false;
@Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. For debugging purposes only.")
private boolean VALIDATE_OLD_RECALIBRATOR = false;
@Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
private boolean USE_SLX_PLATFORM = false;
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
private ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
@ -130,7 +135,12 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
// Initialize the requested covariates by parsing the -cov argument
requestedCovariates = new ArrayList<Covariate>();
int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one
if( COVARIATES != null ) {
if( VALIDATE_OLD_RECALIBRATOR ) {
requestedCovariates.add( new ReadGroupCovariate() );
requestedCovariates.add( new CycleCovariate() ); // unfortunately order is different here in order to match the old recalibrator exactly
requestedCovariates.add( new QualityScoreCovariate() );
requestedCovariates.add( new DinucCovariate() );
} else if( COVARIATES != null ) {
if(COVARIATES[0].equalsIgnoreCase("ALL")) { // the user wants ALL covariates to be used
requestedCovariates.add( new ReadGroupCovariate() ); // first add the required covariates then add the rest by looping over all implementing classes that were found
requestedCovariates.add( new QualityScoreCovariate() );
@ -251,51 +261,61 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
// readGroupId = read.getReadGroup().getReadGroupId();
// readGroupHashMap.put( read, readGroupId );
//}
if( read.getMappingQuality() > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
offset = offsets.get(iii); // offset is zero based so quals[offset] and bases[offset] is correct
// skip first and last base because there is no dinuc, this is mainly done for speed so we don't have to check cases
if( offset > 0 && offset < read.getReadLength() - 1 ) {
// skip first and last base because there is no dinuc, this is mainly done for speed so we don't have to check cases
if( offset > 0 && offset < read.getReadLength() - 1 ) {
quals = read.getBaseQualities();
// Check if we need to use the original quality scores instead
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
if ( obj instanceof String )
quals = QualityUtils.fastqToPhred((String)obj);
else {
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
}
}
// skip if base quality is zero
if( quals[offset] > 0 ) {
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'
refBase = (byte)ref.getBase();
prevBase = bases[offset-1];
// Get the complement base strand if we are a negative strand read
if( read.getReadNegativeStrandFlag() ) {
bases = BaseUtils.simpleComplement( bases ); // this is an expensive call
refBase = (byte)BaseUtils.simpleComplement( ref.getBase() );
prevBase = bases[offset+1];
}
// skip if this base or the previous one was an 'N' or etc.
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) {
final SAMReadGroupRecord readGroup = read.getReadGroup();
readGroupId = readGroup.getReadGroupId(); // this is an expensive call
platform = readGroup.getPlatform(); // this is an expensive call
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
colorSpaceQuals = null;
if( platform.equalsIgnoreCase("SOLID") ) {
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
quals = read.getBaseQualities();
// Check if we need to use the original quality scores instead
if ( USE_ORIGINAL_QUALS && read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
Object obj = read.getAttribute(RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG);
if ( obj instanceof String )
quals = QualityUtils.fastqToPhred((String)obj);
else {
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
}
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
{
updateDataFromRead( read, offset, readGroupId, platform, quals, bases, refBase );
}
// skip if base quality is zero
if( quals[offset] > 0 ) {
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'
refBase = (byte)ref.getBase();
prevBase = bases[offset-1];
// Get the complement base strand if we are a negative strand read
if( read.getReadNegativeStrandFlag() ) {
bases = BaseUtils.simpleComplement( bases ); // this is an expensive call
refBase = (byte)BaseUtils.simpleComplement( ref.getBase() );
prevBase = bases[offset+1];
}
// skip if this base or the previous one was an 'N' or etc.
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) {
final SAMReadGroupRecord readGroup = read.getReadGroup();
readGroupId = readGroup.getReadGroupId();
platform = readGroup.getPlatform();
if( USE_SLX_PLATFORM ) {
platform = "ILLUMINA";
}
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
colorSpaceQuals = null;
if( platform.equalsIgnoreCase("SOLID") ) {
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
}
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
{
updateDataFromRead( read, offset, readGroupId, platform, quals, bases, refBase );
}
} else {
if( VALIDATE_OLD_RECALIBRATOR ) {
countedBases++; // replicating a small bug in the old recalibrator
}
}
}
}
@ -320,6 +340,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
* @param read The read
* @param offset The offset in the read for this locus
* @param readGroup The read group the read is in
* @param platform The String that has the platform this read came from: Illumina, 454, or solid
* @param quals List of base quality scores
* @param bases The bases which make up the read
* @param refBase The reference base at this locus
@ -338,7 +359,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
RecalDatum datum = dataManager.data.get( key );
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
dataManager.data.put( key, datum );
if( VALIDATE_OLD_RECALIBRATOR ) {
dataManager.data.myPut( key, datum );
} else {
dataManager.data.put( key, datum );
}
}
// Need the bases to determine whether or not we have a mismatch
@ -396,26 +421,46 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
*/
private void outputToCSV( final PrintStream recalTableStream ) {
if( !NO_PRINT_HEADER ) {
recalTableStream.printf("# Counted Sites %d%n", countedSites);
recalTableStream.printf("# Counted Bases %d%n", countedBases);
recalTableStream.printf("# Skipped Sites %d%n", skippedSites);
recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
for( Covariate cov : requestedCovariates ) {
// The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name
recalTableStream.println( "@!" + cov.getClass().getSimpleName() );
if( VALIDATE_OLD_RECALIBRATOR ) {
recalTableStream.printf("# collapsed_pos false%n");
recalTableStream.printf("# collapsed_dinuc false%n");
recalTableStream.printf("# counted_sites %d%n", countedSites);
recalTableStream.printf("# counted_bases %d%n", countedBases);
recalTableStream.printf("# skipped_sites %d%n", skippedSites);
recalTableStream.printf("# fraction_skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
recalTableStream.printf("rg,pos,Qrep,dn,nBases,nMismatches,Qemp%n");
// For each entry in the data hashmap
for( Pair<List<? extends Comparable>,RecalDatum> entry : dataManager.data.entrySetSorted4() ) {
// For each Covariate in the key
for( Comparable comp : entry.first ) {
// Output the Covariate's value
recalTableStream.print( comp + "," );
}
// Output the RecalDatum entry
recalTableStream.println( entry.second.outputToCSV() );
}
}
// For each entry in the data hashmap
for( Map.Entry<List<? extends Comparable>, RecalDatum> entry : dataManager.data.entrySet() ) {
// For each Covariate in the key
for( Comparable comp : entry.getKey() ) {
// Output the Covariate's value
if( NO_PRINT_HEADER && comp instanceof String ) { continue; } // BUGBUG
recalTableStream.print( comp + "," );
} else {
if( !NO_PRINT_HEADER ) {
recalTableStream.printf("# Counted Sites %d%n", countedSites);
recalTableStream.printf("# Counted Bases %d%n", countedBases);
recalTableStream.printf("# Skipped Sites %d%n", skippedSites);
recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)countedSites / skippedSites);
for( Covariate cov : requestedCovariates ) {
// The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name
recalTableStream.println( "@!" + cov.getClass().getSimpleName() );
}
}
// For each entry in the data hashmap
for( Map.Entry<List<? extends Comparable>, RecalDatum> entry : dataManager.data.entrySet() ) {
// For each Covariate in the key
for( Comparable comp : entry.getKey() ) {
// Output the Covariate's value
if( NO_PRINT_HEADER && comp instanceof String ) { continue; } // BUGBUG
recalTableStream.print( comp + "," );
}
// Output the RecalDatum entry
recalTableStream.println( entry.getValue().outputToCSV() );
}
// Output the RecalDatum entry
recalTableStream.println( entry.getValue().outputToCSV() );
}
}
}

View File

@ -2,8 +2,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import net.sf.samtools.SAMRecord;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
/*
@ -60,10 +58,12 @@ public class DinucCovariate implements Covariate {
final byte[] quals, final byte[] bases) {
byte base = bases[offset];
byte prevBase = bases[offset - 1];
byte prevBase;
// If this is a negative strand read then we need to reverse the direction for our previous base
if( read.getReadNegativeStrandFlag() ) {
prevBase = bases[offset + 1];
} else {
prevBase = bases[offset - 1];
}
//char[] charArray = {(char)prevBase, (char)base};
//return new String( charArray ); // This is an expensive call

View File

@ -1,5 +1,8 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import java.util.*;
/*
@ -33,18 +36,84 @@ import java.util.*;
* Date: Oct 30, 2009
*
* A HashMap that maps a list of comparables to any object <T>.
* There is functionality for the mappings to be given back to you in sorted order.
*/
public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
private static final long serialVersionUID = 1L; //BUGBUG: what should I do here?, added by Eclipse
private ArrayList<ArrayList<Comparable>> keyLists;
public NHashMap() {
super();
keyLists = null;
}
public NHashMap( int initialCapacity, float loadingFactor ) {
super( initialCapacity, loadingFactor );
keyLists = null;
}
// This method is here only to help facilitate direct comparison of old and refactored recalibrator.
// The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to.
public T myPut(List<? extends Comparable> key, T value) {
if( keyLists == null ) {
keyLists = new ArrayList<ArrayList<Comparable>>();
for( Comparable comp : key ) {
keyLists.add( new ArrayList<Comparable>() );
}
}
ArrayList<Comparable> thisList;
for( int iii = 0; iii < key.size(); iii++ ) {
thisList = keyLists.get( iii );
if( thisList == null ) {
thisList = new ArrayList<Comparable>();
}
if( !thisList.contains( key.get( iii ) ) ) {
thisList.add( key.get(iii ) );
}
}
return super.put( key, value );
}
// This method is very ugly but is here only to help facilitate direct comparison of old and refactored recalibrator.
// The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to.
@SuppressWarnings(value = "unchecked")
public ArrayList<Pair<List<? extends Comparable>, T>> entrySetSorted4() {
ArrayList<Pair<List<? extends Comparable>, T>> theSet = new ArrayList<Pair<List<? extends Comparable>, T>>();
for( ArrayList<Comparable> list : keyLists ) {
Collections.sort(list);
}
if( keyLists.size() != 4 ) {
throw new StingException("Are you sure you want to be calling this ugly method? NHashMap.entrySetSorted4()");
}
ArrayList<Comparable> newKey = null;
for( Comparable c0 : keyLists.get(0) ) {
for( Comparable c1 : keyLists.get(1) ) {
for( Comparable c2 : keyLists.get(2) ) {
for( Comparable c3 : keyLists.get(3) ) {
newKey = new ArrayList<Comparable>();
newKey.add(c0);
newKey.add(c1);
newKey.add(c2);
newKey.add(c3);
T value = this.get( newKey );
if( value!= null ) {
theSet.add(new Pair<List<? extends Comparable>,T>( newKey, value ) );
}
}
}
}
}
return theSet;
}
public static <T> List<T> makeList(T... args) {

View File

@ -74,6 +74,10 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private int WINDOW_SIZE = 3;
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
private int SMOOTHING = 1;
@Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. For debugging purposes only.")
private boolean VALIDATE_OLD_RECALIBRATOR = false;
@Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
private boolean USE_SLX_PLATFORM = false;
//public enum RecalibrationMode {
// COMBINATORIAL,
@ -89,6 +93,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private ArrayList<Covariate> requestedCovariates;
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
@ -133,7 +138,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
try {
for ( String line : new xReadLines(new File( RECAL_FILE )) ) {
lineNumber++;
if( COMMENT_PATTERN.matcher(line).matches() ) {
if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches()) {
; // skip over the comment lines, (which start with '#')
}
else if( COVARIATE_PATTERN.matcher(line).matches() ) { // the line string is either specifying a covariate or is giving csv data
@ -168,6 +173,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
} else { // found some data
if( !foundAllCovariates ) {
if( VALIDATE_OLD_RECALIBRATOR ) {
requestedCovariates.add( new ReadGroupCovariate() );
requestedCovariates.add( new QualityScoreCovariate() );
requestedCovariates.add( new CycleCovariate() );
requestedCovariates.add( new DinucCovariate() );
}
foundAllCovariates = true;
if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; } // Don't want to crash with out of heap space exception
final boolean createCollapsedTables = true;
@ -208,7 +219,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
int iii;
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
cov = requestedCovariates.get( iii );
key.add( cov.getValue( vals[iii] ) );
if( VALIDATE_OLD_RECALIBRATOR ) {
if( iii == 1 ) { // order is different in the old recalibrator
key.add( cov.getValue( vals[2] ) );
} else if ( iii == 2 ) {
key.add( cov.getValue( vals[1] ) );
} else {
key.add( cov.getValue( vals[iii] ) );
}
} else {
key.add( cov.getValue( vals[iii] ) );
}
}
RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ) );
dataManager.addToAllTables( key, datum );
@ -231,10 +252,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// WARNING: refBases is always null because this walker doesn't have @Requires({DataSource.REFERENCE_BASES})
// This is done in order to speed up the code
if( read.getMappingQuality() <= 0 ) {
return read; // early return here, unmapped reads and mapping quality zero reads should be left alone
}
byte[] originalQuals = read.getBaseQualities();
// Check if we need to use the original quality scores instead
@ -251,33 +268,30 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// These calls are expensive so only do them once for each read
String readGroup = read.getReadGroup().getReadGroupId();
String platform = read.getReadGroup().getPlatform();
if( USE_SLX_PLATFORM ) {
platform = "ILLUMINA";
}
byte[] bases = read.getReadBases();
int startPos = 1;
int stopPos = read.getReadLength();
if( read.getReadNegativeStrandFlag() ) {
bases = BaseUtils.simpleComplement( bases );
startPos = 0;
stopPos = read.getReadLength() - 1;
}
// For each base in the read
for( int iii = 1; iii < read.getReadLength() - 1; iii++ ) { // skip first and last bases because there is no dinuc
for( int iii = startPos; iii < stopPos; iii++ ) { // skip first or last base because there is no dinuc depending on the direction of the read
List<Comparable> key = new ArrayList<Comparable>();
// Get the covariate values which make up the key
for( Covariate covariate : requestedCovariates ) {
key.add( covariate.getValue( read, iii, readGroup, platform, originalQuals, bases ) ); // offset is zero based so passing iii is correct here
}
recalQuals[iii] = performSequentialQualityCalculation( key );
//if( MODE_STRING.equalsIgnoreCase("COMBINATORIAL") ) { // BUGBUG: This isn't supported. No need to keep the full data hashmap around so it was removed for major speed up
// //RecalDatum datum = dataManager.data.get( key );
// //if( datum != null ) { // if we have data for this combination of covariates then recalibrate the quality score otherwise do nothing
// // recalQuals[iii] = datum.empiricalQualByte( SMOOTHING );
// //}
// throw new StingException("The Combinatorial mode isn't supported.");
//} else if( MODE_STRING.equalsIgnoreCase("SEQUENTIAL") ) {
//
//} else {
// throw new StingException( "Specified RecalibrationMode is not supported: " + MODE_STRING );
//}
}
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
@ -340,11 +354,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// The shift in quality due to each covariate by itself in turn
double deltaQCovariates = 0.0;
Double deltaQCovariateEmpirical;
double deltaQPos = 0.0;
double deltaQDinuc = 0.0;
for( int iii = 2; iii < key.size(); iii++ ) {
newKey.add( key.get(iii) ); // the given covariate
deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get( newKey );
if( deltaQCovariateEmpirical != null ) {
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
if( VALIDATE_OLD_RECALIBRATOR ) {
if(iii==2) { deltaQPos = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); } // BUGBUG: only here to validate against the old recalibrator
if(iii==3) { deltaQDinuc = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); }
}
}
newKey.remove( 2 ); // this new covariate is always added in at position 2 in the newKey list
}

View File

@ -82,6 +82,7 @@ public class CovariateCounterTest extends BaseTest {
read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",1,1, bases1, quals3);
}
@Test
public void testCovariateCounterSetup() {
Assert.assertEquals("Number of read groups is wrong", c.getNReadGroups(), 2);
@ -179,4 +180,5 @@ public class CovariateCounterTest extends BaseTest {
c.updateDataFromRead(readGroup1, read, 0, (char)bases[0], false);
}
}

View File

@ -53,6 +53,7 @@ public class RecalDataTest extends BaseTest {
starDatum.B = starDatum.N = 1;
}
@Test
public void testBasic() {
logger.warn("Executing testIsBetween");
@ -178,4 +179,5 @@ public class RecalDataTest extends BaseTest {
}
Assert.assertEquals(indices.size(), 17);
}
}

View File

@ -0,0 +1,71 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
import java.util.Arrays;
import java.util.List;
import java.io.File;
public class RefactoredRecalibrationWalkersIntegrationTest extends WalkerTest {
static HashMap<String, String> paramsFiles = new HashMap<String, String>();
@Test
public void testCountCovariatesR() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "7be0b7a624d22187e712131f12aae546" );
//e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "dc1a1b6e99f4a47525cc1dce7b6eb1dc" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
String md5 = entry.getValue();
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R /broad/1KG/reference/human_b36_both.fasta" +
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
" -T CountCovariatesRefactored" +
" -I " + bam +
" -L 1:10,000,000-11,000,000" +
" --validate_old_recalibrator" +
" --use_slx_platform" +
" -recalFile %s",
1, // just one output file
Arrays.asList(md5));
List<File> result = executeTest("testCountCovariatesR", spec).getFirst();
paramsFiles.put(bam, result.get(0).getAbsolutePath());
}
}
@Test
public void testTableRecalibratorR() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.SLX.SRP000031.2009_06.chr1.10_20mb.bam", "3ace4b9b8495429cc32e7008145f4996" );
//e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "" );
//e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
String md5 = entry.getValue();
String paramsFile = paramsFiles.get(bam);
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
if ( paramsFile != null ) {
WalkerTestSpec spec = new WalkerTestSpec(
"-R /broad/1KG/reference/human_b36_both.fasta" +
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
" -T TableRecalibrationRefactored" +
" -I " + bam +
" -L 1:10,000,000-20,000,000" +
" --validate_old_recalibrator" +
" --use_slx_platform" +
" -outputBam %s" +
" -recalFile " + paramsFile,
1, // just one output file
Arrays.asList(md5));
executeTest("testTableRecalibratorR", spec);
}
}
}
}