Added some documentation to the helper classes. Fixed an error case in TableRecalibrationWalker.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2046 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-11-14 18:13:43 +00:00
parent 15c14add4d
commit 672472789e
8 changed files with 27 additions and 10 deletions

View File

@ -390,6 +390,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
// For each Covariate in the key
for( Comparable comp : entry.getKey() ) {
// Output the Covariate's value
if( NO_PRINT_HEADER && comp instanceof String ) { continue; } // BUGBUG
recalTableStream.print( comp + "," );
}
// Output the RecalDatum entry

View File

@ -63,7 +63,7 @@ public class CycleCovariate implements Covariate {
}
return cycle;
} else if( platform.equalsIgnoreCase( "454" ) ) {
int cycle = 1;
int cycle = 0;
char prevBase = bases[0];
for( int iii = 1; iii <= offset; iii++ ) {
if(bases[iii] != prevBase) { // this base doesn't match the previous one so it is a new cycle
@ -74,7 +74,7 @@ public class CycleCovariate implements Covariate {
return cycle;
} else if( platform.equalsIgnoreCase( "SOLID" ) ) {
// the ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
return (offset / 5) + 1; // integer division
return offset / 5; // integer division
} else {
throw new StingException( "Requested platform (" + platform + ") not supported in CycleCovariate." );
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.QualityUtils;
/*
* Copyright (c) 2009 The Broad Institute

View File

@ -34,6 +34,9 @@ import java.util.*;
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Oct 30, 2009
*
* A HashMap that maps a list of comparables to any object <T>.
* There is functionality for the mappings to be given back to you in sorted order.
*/
public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
@ -113,9 +116,7 @@ public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
return theSet;
}
public List<T> makeList(T... args) {
public static <T> List<T> makeList(T... args) {
List<T> list = new ArrayList<T>();
for (T arg : args)
{

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.QualityUtils;
/*
* Copyright (c) 2009 The Broad Institute

View File

@ -34,6 +34,8 @@ import java.util.*;
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 6, 2009
*
* This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions.
*/
public class RecalDataManager {
@ -42,9 +44,9 @@ public class RecalDataManager {
private NHashMap<RecalDatum> dataCollapsedQualityScore; // table where everything except read group and quality score has been collapsed
private ArrayList<NHashMap<RecalDatum>> dataCollapsedByCovariate; // tables where everything except read group, quality score, and given covariate has been collapsed
private boolean collapsedTablesCreated;
public NHashMap<Double> dataSumExpectedErrors;
public NHashMap<Double> dataSumExpectedErrors; // table used to calculate the overall aggregate quality score in which everything except read group is collapsed
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // the tag in a BAM file that holds the original quality scores
RecalDataManager() {
data = new NHashMap<RecalDatum>();
@ -58,6 +60,10 @@ public class RecalDataManager {
}
// BUGBUG: A lot going on in this method, doing a lot of pre-calculations for use in the sequential mode calculation later in TableRecalibrationWalker
/**
* Create all the collapsed tables that will be used in the sequential calculation in TableRecalibrationWalker
* @param numCovariates The number of covariates you have determines the number of tables to create
*/
public final void createCollapsedTables( final int numCovariates ) {
dataCollapsedReadGroup = new NHashMap<RecalDatum>();
dataCollapsedQualityScore = new NHashMap<RecalDatum>();
@ -88,7 +94,8 @@ public class RecalDataManager {
} else {
collapsedDatum.increment( thisDatum );
}
// create dataSumExpectedErrors, the table used to calculate the overall aggregate quality score in which everything except read group is collapsed
newKey = new ArrayList<Comparable>();
newKey.add( key.get(0) ); // make a new key with just the read group
sumExpectedErrors = dataSumExpectedErrors.get( newKey );
@ -129,6 +136,11 @@ public class RecalDataManager {
collapsedTablesCreated = true;
}
/**
* Get the appropriate collapsed table out of the set of all the tables held by this Object
* @param covariate Which covariate indexes the desired collapsed HashMap
* @return The desired collapsed HashMap
*/
public final NHashMap<RecalDatum> getCollapsedTable( final int covariate ) {
if( !collapsedTablesCreated ) {
throw new StingException("Trying to get collapsed tables before they have been populated. Null pointers abound.");

View File

@ -34,6 +34,8 @@ import java.util.*;
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Nov 3, 2009
*
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class RecalDatum {

View File

@ -230,6 +230,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// These calls are expensive so only do them once for each read
String readGroup = read.getReadGroup().getReadGroupId();
char[] bases = read.getReadString().toCharArray();
if( refBases.length != bases.length ) {
return read; // something is wrong with the mapping of the read so leave it alone
}
String myRefBases = new String(refBases);
if( read.getReadNegativeStrandFlag() ) {
bases = BaseUtils.simpleComplement( read.getReadString() ).toCharArray();