Added some documentation to the code, mostly especially to CovariateCounterWalker but various comments added throughout. Also changed the HashMap data structure to accept an estimated initial capacity. This had a very modest improvement to the speed.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2001 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
74751a8ed3
commit
740a5484c4
|
|
@ -2,12 +2,41 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Oct 30, 2009
|
* Date: Oct 30, 2009
|
||||||
|
*
|
||||||
|
* The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read and corresponding reference bases
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public interface Covariate {
|
public interface Covariate {
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases); // used to pick out the value from the read and etc
|
public Comparable getValue(SAMRecord read, int offset, char[] refBases); // used to pick out the value from the read and reference bases
|
||||||
public Comparable<?> getValue(String str); // used to get value from input file
|
public Comparable getValue(String str); // used to get value from input file
|
||||||
|
public int estimatedNumberOfBins(); // used to estimate the amount space required in the hashmap
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,10 +16,47 @@ import java.util.*;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Nov 3, 2009
|
* Date: Nov 3, 2009
|
||||||
|
*
|
||||||
|
* This walker is designed to work as the first pass in a two-pass processing step.
|
||||||
|
* It does a by-locus traversal operating only at sites that are not in dbSNP.
|
||||||
|
* We assume that all mismatches we see are therefore errors.
|
||||||
|
* This walker generates tables based on various user-specified covariates (such as read group, reported quality score, cycle, and dinuc)
|
||||||
|
* Since there is a large amount of data one can then calculate an empirical probability of error
|
||||||
|
* given the particular covariates seen at this site, where p(error) = num mismatches / num observations
|
||||||
|
* The output file is a CSV list of (several covariate values, num observations, num mismatches, empirical quality score)
|
||||||
|
* The first lines of the output file give the name of the covariate classes that were used for this calculation.
|
||||||
|
*
|
||||||
|
* Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and are automatically added first.
|
||||||
|
* Note: This walker is designed to be used in conjunction with TableRecalibrationWalker.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@WalkerName("CountCovariatesRefactored")
|
@WalkerName("CountCovariatesRefactored")
|
||||||
|
|
@ -36,15 +73,18 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
@Argument(fullName="recal_file", shortName="rf", required=false, doc="Filename for the outputted covariates table recalibration file")
|
@Argument(fullName="recal_file", shortName="rf", required=false, doc="Filename for the outputted covariates table recalibration file")
|
||||||
public String RECAL_FILE = "output.recal_data.csv";
|
public String RECAL_FILE = "output.recal_data.csv";
|
||||||
|
|
||||||
protected static RecalDataManager dataManager;
|
protected static RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||||
protected static ArrayList<Covariate> requestedCovariates;
|
protected static ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse the -cov arguments and create a list of covariates to be used here
|
||||||
|
* Based on the covariates' estimates for initial capacity allocate the data hashmap
|
||||||
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
dataManager = new RecalDataManager();
|
|
||||||
|
|
||||||
// Get a list of all available covariates
|
// Get a list of all available covariates
|
||||||
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
||||||
|
int estimatedCapacity = 1; // start at one because capacity is multiplicative for each covariate
|
||||||
|
|
||||||
// Print and exit if that's what was requested
|
// Print and exit if that's what was requested
|
||||||
if ( LIST_ONLY ) {
|
if ( LIST_ONLY ) {
|
||||||
|
|
@ -57,7 +97,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
System.exit( 0 ); // early exit here because user requested it
|
System.exit( 0 ); // early exit here because user requested it
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the requested covariates
|
// Initialize the requested covariates by parsing the -cov argument
|
||||||
requestedCovariates = new ArrayList<Covariate>();
|
requestedCovariates = new ArrayList<Covariate>();
|
||||||
requestedCovariates.add( new ReadGroupCovariate() ); // Read Group Covariate is a required covariate for the recalibration calculation
|
requestedCovariates.add( new ReadGroupCovariate() ); // Read Group Covariate is a required covariate for the recalibration calculation
|
||||||
requestedCovariates.add( new QualityScoreCovariate() ); // Quality Score Covariate is a required covariate for the recalibration calculation
|
requestedCovariates.add( new QualityScoreCovariate() ); // Quality Score Covariate is a required covariate for the recalibration calculation
|
||||||
|
|
@ -67,11 +107,13 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
boolean foundClass = false;
|
boolean foundClass = false;
|
||||||
for( Class<?> covClass : classes ) {
|
for( Class<?> covClass : classes ) {
|
||||||
|
|
||||||
if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class
|
||||||
foundClass = true;
|
foundClass = true;
|
||||||
try {
|
try {
|
||||||
Covariate covariate = (Covariate)covClass.newInstance();
|
Covariate covariate = (Covariate)covClass.newInstance();
|
||||||
requestedCovariates.add( covariate );
|
requestedCovariates.add( covariate );
|
||||||
|
estimatedCapacity *= covariate.estimatedNumberOfBins(); // update the estimated initial capacity
|
||||||
|
|
||||||
if (covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate) {
|
if (covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate) {
|
||||||
throw new StingException( "ReadGroupCovariate and QualityScoreCovariate are required covariates and are therefore added for you. Please remove them from the -cov list" );
|
throw new StingException( "ReadGroupCovariate and QualityScoreCovariate are required covariates and are therefore added for you. Please remove them from the -cov list" );
|
||||||
}
|
}
|
||||||
|
|
@ -92,27 +134,48 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
|
|
||||||
logger.info( "The covariates being used here: " );
|
logger.info( "The covariates being used here: " );
|
||||||
logger.info( requestedCovariates );
|
logger.info( requestedCovariates );
|
||||||
|
|
||||||
|
dataManager = new RecalDataManager( estimatedCapacity );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// map
|
||||||
|
//
|
||||||
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For each read at this locus get the various covariate values and increment that location in the map based on
|
||||||
|
* whether or not the base matches the reference at this particular location
|
||||||
|
* @param tracker the reference metadata tracker
|
||||||
|
* @param ref the reference context
|
||||||
|
* @param context the alignment context
|
||||||
|
* @return returns 1, but this value isn't used in the reduce step
|
||||||
|
*/
|
||||||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||||
|
|
||||||
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
|
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
|
||||||
|
|
||||||
// Only use data from non-dbsnp sites
|
// Only use data from non-dbsnp sites
|
||||||
// Assume every mismatch at a non-dbsnp site is indicitive of poor quality
|
// Assume every mismatch at a non-dbsnp site is indicitive of poor quality
|
||||||
if( dbsnp == null ) {
|
if( dbsnp == null ) {
|
||||||
List<SAMRecord> reads = context.getReads();
|
List<SAMRecord> reads = context.getReads();
|
||||||
List<Integer> offsets = context.getOffsets();
|
List<Integer> offsets = context.getOffsets();
|
||||||
SAMRecord read; // preallocate for use in for loop below
|
SAMRecord read;
|
||||||
int offset; // preallocate for use in for loop below
|
int offset;
|
||||||
|
|
||||||
|
// For each read at this locus
|
||||||
for( int iii = 0; iii < reads.size(); iii++ ) {
|
for( int iii = 0; iii < reads.size(); iii++ ) {
|
||||||
read = reads.get(iii);
|
read = reads.get(iii);
|
||||||
offset = offsets.get(iii);
|
offset = offsets.get(iii);
|
||||||
|
|
||||||
// Only use data from reads with mapping quality above given quality value and base quality greater than zero
|
// Only use data from reads with mapping quality above specified quality value and base quality greater than zero
|
||||||
byte[] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
if( read.getMappingQuality() >= MIN_MAPPING_QUALITY && quals[offset] > 0)
|
if( read.getMappingQuality() >= MIN_MAPPING_QUALITY && quals[offset] > 0)
|
||||||
{
|
{
|
||||||
if( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they don't have a dinuc count
|
// Skip first and last bases because they don't have a dinuc count
|
||||||
|
if( offset > 0 && offset < (read.getReadLength() - 1) ) {
|
||||||
updateDataFromRead(read, offset, ref);
|
updateDataFromRead(read, offset, ref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -124,43 +187,69 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Major workhorse routine for this walker.
|
||||||
|
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||||
|
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
|
||||||
|
* adding one to the number of observations and potentially one to the number of mismatches
|
||||||
|
* @param read the read
|
||||||
|
* @param offset the offset in the read for this locus
|
||||||
|
* @param ref the reference context
|
||||||
|
*/
|
||||||
private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) {
|
private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) {
|
||||||
|
|
||||||
List<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
List<Comparable> key = new ArrayList<Comparable>();
|
||||||
Comparable<?> keyElement; // preallocate for use in for loop below
|
Comparable keyElement;
|
||||||
boolean badKey = false;
|
boolean badKey = false;
|
||||||
|
|
||||||
|
// Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||||
for( Covariate covariate : requestedCovariates ) {
|
for( Covariate covariate : requestedCovariates ) {
|
||||||
keyElement = covariate.getValue( read, offset, ref.getBases() );
|
keyElement = covariate.getValue( read, offset, ref.getBases() );
|
||||||
if( keyElement != null ) {
|
if( keyElement != null ) {
|
||||||
key.add( keyElement );
|
key.add( keyElement );
|
||||||
} else {
|
} else {
|
||||||
badKey = true;
|
badKey = true; // covariate returned bad value, for example dinuc returns null because base = 'N'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Using the list of covariate values as a key, pick out the RecalDatum
|
||||||
RecalDatum datum = null;
|
RecalDatum datum = null;
|
||||||
if( !badKey ) {
|
if( !badKey ) {
|
||||||
datum = dataManager.data.get( key );
|
datum = dataManager.data.get( key );
|
||||||
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
||||||
datum = new RecalDatum();
|
datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
|
||||||
dataManager.data.put( key, datum );
|
dataManager.data.put( key, datum );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Need the bases to determine whether or not we have a mismatch
|
||||||
byte[] bases = read.getReadBases();
|
byte[] bases = read.getReadBases();
|
||||||
|
|
||||||
char base = (char)bases[offset];
|
char base = (char)bases[offset];
|
||||||
char refBase = ref.getBase();
|
char refBase = ref.getBase();
|
||||||
|
// Get the complement base strand if we are a negative direction read
|
||||||
if ( read.getReadNegativeStrandFlag() ) {
|
if ( read.getReadNegativeStrandFlag() ) {
|
||||||
refBase = BaseUtils.simpleComplement( refBase );
|
refBase = BaseUtils.simpleComplement( refBase );
|
||||||
base = BaseUtils.simpleComplement( base );
|
base = BaseUtils.simpleComplement( base );
|
||||||
}
|
}
|
||||||
|
|
||||||
if( datum != null ) {
|
if( datum != null ) {
|
||||||
|
// Add one to the number of observations and potentially one to the number of mismatches
|
||||||
datum.increment( base, refBase );
|
datum.increment( base, refBase );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// reduce
|
||||||
|
//
|
||||||
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the reudce step by creating a PrintStream from the filename specified as an argument to the walker.
|
||||||
|
* @return returns a PrintStream created from the -rf filename
|
||||||
|
*/
|
||||||
public PrintStream reduceInit() {
|
public PrintStream reduceInit() {
|
||||||
try {
|
try {
|
||||||
return new PrintStream( RECAL_FILE );
|
return new PrintStream( RECAL_FILE );
|
||||||
|
|
@ -169,10 +258,20 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Reduce method doesn't do anything for this walker.
|
||||||
|
* @param value result of the map.
|
||||||
|
* @param recalTableStream the PrintStream
|
||||||
|
* @return returns the PrintStream used to output the CSV data
|
||||||
|
*/
|
||||||
public PrintStream reduce( Integer value, PrintStream recalTableStream ) {
|
public PrintStream reduce( Integer value, PrintStream recalTableStream ) {
|
||||||
return recalTableStream; // nothing to do here
|
return recalTableStream; // nothing to do here
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write out the full data hashmap to disk in CSV format
|
||||||
|
* @param recalTableStream the PrintStream to write out to
|
||||||
|
*/
|
||||||
public void onTraversalDone( PrintStream recalTableStream ) {
|
public void onTraversalDone( PrintStream recalTableStream ) {
|
||||||
out.print( "Writing raw recalibration data..." );
|
out.print( "Writing raw recalibration data..." );
|
||||||
for( Covariate cov : requestedCovariates ) {
|
for( Covariate cov : requestedCovariates ) {
|
||||||
|
|
@ -184,16 +283,19 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
recalTableStream.close();
|
recalTableStream.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
|
||||||
|
* @param recalTableStream the PrintStream to write out to
|
||||||
|
*/
|
||||||
private void outputToCSV( PrintStream recalTableStream ) {
|
private void outputToCSV( PrintStream recalTableStream ) {
|
||||||
for( Map.Entry<List<? extends Comparable<?>>, RecalDatum> entry : dataManager.data.entrySet() ) {
|
// For each entry in the data hashmap
|
||||||
//for( int iii = 0; iii < entry.getKey().size(); iii++ ) {
|
for( Map.Entry<List<? extends Comparable>, RecalDatum> entry : dataManager.data.entrySet() ) {
|
||||||
//index = Integer.parseInt( (entry.getKey().get( iii )).toString());
|
// For each Covariate in the key
|
||||||
//recalTableStream.print( requestedCovariates.get( iii ).getValue( index ) + "," );
|
for( Comparable comp : entry.getKey() ) {
|
||||||
// recalTableStream.print( entry.getKey().get(iii) )
|
// Output the Covariate's value
|
||||||
//}
|
|
||||||
for( Comparable<?> comp : entry.getKey() ) {
|
|
||||||
recalTableStream.print( comp + "," );
|
recalTableStream.print( comp + "," );
|
||||||
}
|
}
|
||||||
|
// Output the RecalDatum entry
|
||||||
recalTableStream.println( entry.getValue().outputToCSV() );
|
recalTableStream.println( entry.getValue().outputToCSV() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,16 +2,42 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Oct 30, 2009
|
* Date: Oct 30, 2009
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class CycleCovariate implements Covariate {
|
public class CycleCovariate implements Covariate {
|
||||||
|
|
||||||
public String platform;
|
public String platform;
|
||||||
|
|
||||||
public CycleCovariate() { // empty constructor is required by CovariateCounterWalker
|
public CycleCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||||
platform = null;
|
platform = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -19,8 +45,8 @@ public class CycleCovariate implements Covariate {
|
||||||
platform = _platform;
|
platform = _platform;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||||
//BUGBUG: assumes Solexia platform
|
//BUGBUG: assumes Solexa platform
|
||||||
Integer cycle = offset;
|
Integer cycle = offset;
|
||||||
if( read.getReadNegativeStrandFlag() ) {
|
if( read.getReadNegativeStrandFlag() ) {
|
||||||
cycle = read.getReadLength() - (offset + 1);
|
cycle = read.getReadLength() - (offset + 1);
|
||||||
|
|
@ -28,10 +54,14 @@ public class CycleCovariate implements Covariate {
|
||||||
return cycle;
|
return cycle;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(String str) {
|
public Comparable getValue(String str) {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int estimatedNumberOfBins() {
|
||||||
|
return 100;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Cycle";
|
return "Cycle";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,16 +5,42 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Nov 3, 2009
|
* Date: Nov 3, 2009
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class DinucCovariate implements Covariate {
|
public class DinucCovariate implements Covariate {
|
||||||
|
|
||||||
public static ArrayList<String> BASES;
|
public static ArrayList<String> BASES;
|
||||||
|
|
||||||
public DinucCovariate() { // empty constructor is required by CovariateCounterWalker
|
public DinucCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||||
BASES = new ArrayList<String>();
|
BASES = new ArrayList<String>();
|
||||||
BASES.add("A");
|
BASES.add("A");
|
||||||
BASES.add("G");
|
BASES.add("G");
|
||||||
|
|
@ -26,7 +52,7 @@ public class DinucCovariate implements Covariate {
|
||||||
BASES.add("t");
|
BASES.add("t");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||||
byte[] bases = read.getReadBases();
|
byte[] bases = read.getReadBases();
|
||||||
char base = (char)bases[offset];
|
char base = (char)bases[offset];
|
||||||
char prevBase = (char)bases[offset - 1];
|
char prevBase = (char)bases[offset - 1];
|
||||||
|
|
@ -36,16 +62,20 @@ public class DinucCovariate implements Covariate {
|
||||||
}
|
}
|
||||||
// Check if bad base, probably an 'N'
|
// Check if bad base, probably an 'N'
|
||||||
if( !BASES.contains( String.format( "%c", prevBase ) ) || !BASES.contains( String.format( "%c", base) ) ) {
|
if( !BASES.contains( String.format( "%c", prevBase ) ) || !BASES.contains( String.format( "%c", base) ) ) {
|
||||||
return null; // CovariateCounterWalker and TableRecalibrationWalker will recognize that null means skip this particular location in the read
|
return null; // CovariateCounterWalker will recognize that null means skip this particular location in the read
|
||||||
} else {
|
} else {
|
||||||
return String.format("%c%c", prevBase, base);
|
return String.format("%c%c", prevBase, base);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(String str) {
|
public Comparable getValue(String str) {
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int estimatedNumberOfBins() {
|
||||||
|
return 16;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Dinucleotide";
|
return "Dinucleotide";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,23 +2,53 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Nov 4, 2009
|
* Date: Nov 4, 2009
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MappingQualityCovariate implements Covariate {
|
public class MappingQualityCovariate implements Covariate {
|
||||||
|
|
||||||
public MappingQualityCovariate() { // empty constructor is required by CovariateCounterWalker
|
public MappingQualityCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||||
return (Integer)(read.getMappingQuality());
|
return (Integer)(read.getMappingQuality()); // cast to Object Integer is required so that return values from the two getValue methods hash to the same code
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(String str) {
|
public Comparable getValue(String str) {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int estimatedNumberOfBins() {
|
||||||
|
return 100;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Mapping Quality Score";
|
return "Mapping Quality Score";
|
||||||
|
|
|
||||||
|
|
@ -3,17 +3,43 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Nov 4, 2009
|
* Date: Nov 4, 2009
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MinimumNQSCovariate implements Covariate {
|
public class MinimumNQSCovariate implements Covariate {
|
||||||
|
|
||||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
||||||
protected boolean USE_ORIGINAL_QUALS;
|
protected boolean USE_ORIGINAL_QUALS;
|
||||||
|
|
||||||
public MinimumNQSCovariate() { // empty constructor is required by CovariateCounterWalker
|
public MinimumNQSCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||||
USE_ORIGINAL_QUALS = false;
|
USE_ORIGINAL_QUALS = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -21,7 +47,7 @@ public class MinimumNQSCovariate implements Covariate {
|
||||||
USE_ORIGINAL_QUALS = originalQuals;
|
USE_ORIGINAL_QUALS = originalQuals;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||||
byte[] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
if ( USE_ORIGINAL_QUALS && read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||||
Object obj = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
Object obj = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||||
|
|
@ -41,10 +67,14 @@ public class MinimumNQSCovariate implements Covariate {
|
||||||
return minQual;
|
return minQual;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(String str) {
|
public Comparable getValue(String str) {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int estimatedNumberOfBins() {
|
||||||
|
return 40;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Minimum Neighborhood Quality Score";
|
return "Minimum Neighborhood Quality Score";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,18 +2,93 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Oct 30, 2009
|
* Date: Oct 30, 2009
|
||||||
*/
|
*/
|
||||||
public class NHashMap<T> extends HashMap<List<? extends Comparable<?>>, T> {
|
|
||||||
|
public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
|
||||||
|
|
||||||
private static final long serialVersionUID = 1L; //BUGBUG: what should I do here?
|
private static final long serialVersionUID = 1L; //BUGBUG: what should I do here?
|
||||||
|
private ArrayList<ArrayList<Comparable>> keyLists;
|
||||||
|
|
||||||
|
public NHashMap() {
|
||||||
|
super();
|
||||||
|
keyLists = new ArrayList<ArrayList<Comparable>>();
|
||||||
|
}
|
||||||
|
|
||||||
public static <T extends Comparable<?>> List<T> makeList(T... args) {
|
public NHashMap( int initialCapacity, float loadingFactor ) {
|
||||||
|
super( initialCapacity, loadingFactor );
|
||||||
|
keyLists = new ArrayList<ArrayList<Comparable>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
// This method is here only to help facilitate direct comparison of old and refactored recalibrator.
|
||||||
|
// The old recalibrator prints out it's mappings in a sorted order but the refactored recalibrator doesn't need to.
|
||||||
|
// Comment out this overriding method to improve performance
|
||||||
|
public T put(List<? extends Comparable> key, T value) {
|
||||||
|
|
||||||
|
ArrayList<Comparable> thisList;
|
||||||
|
for( int iii = 0; iii < key.size(); iii++ ) {
|
||||||
|
thisList = keyLists.get( iii );
|
||||||
|
if( thisList == null ) {
|
||||||
|
thisList = new ArrayList<Comparable>();
|
||||||
|
}
|
||||||
|
if( !thisList.contains( key.get( iii ) ) ) {
|
||||||
|
thisList.add( key.get(iii ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return super.put( key, value );
|
||||||
|
}
|
||||||
|
|
||||||
|
// This method is here only to help facilitate direct comparison of old and refactored recalibrator.
|
||||||
|
// The old recalibrator prints out it's mappings in a sorted order but the refactored recalibrator doesn't need to.
|
||||||
|
// Comment out this overriding method to improve performance
|
||||||
|
public Set<Map.Entry<List<? extends Comparable>, T>> entrySet() {
|
||||||
|
|
||||||
|
for( ArrayList<Comparable> list : keyLists ) {
|
||||||
|
Collections.sort(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
for( ArrayList<Comparable> list : keyLists ) {
|
||||||
|
for( Comparable comp : list ) {
|
||||||
|
// BUGBUG: unfinished
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
public static <T extends Comparable> List<T> makeList(T... args) {
|
||||||
List<T> list = new ArrayList<T>();
|
List<T> list = new ArrayList<T>();
|
||||||
for (T arg : args)
|
for (T arg : args)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -3,17 +3,43 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Nov 3, 2009
|
* Date: Nov 3, 2009
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class QualityScoreCovariate implements Covariate {
|
public class QualityScoreCovariate implements Covariate {
|
||||||
|
|
||||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
||||||
protected boolean USE_ORIGINAL_QUALS;
|
protected boolean USE_ORIGINAL_QUALS;
|
||||||
|
|
||||||
public QualityScoreCovariate() { // empty constructor is required by CovariateCounterWalker
|
public QualityScoreCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||||
USE_ORIGINAL_QUALS = false;
|
USE_ORIGINAL_QUALS = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -21,7 +47,7 @@ public class QualityScoreCovariate implements Covariate {
|
||||||
USE_ORIGINAL_QUALS = originalQuals;
|
USE_ORIGINAL_QUALS = originalQuals;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||||
byte[] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
if ( USE_ORIGINAL_QUALS && read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||||
Object obj = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
Object obj = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||||
|
|
@ -32,12 +58,16 @@ public class QualityScoreCovariate implements Covariate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ((Integer)((int)quals[offset]));
|
return ((Integer)((int)quals[offset])); // cast to Object Integer is required so that return values from the two getValue methods hash to the same code
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(String str) {
|
public Comparable getValue(String str) {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int estimatedNumberOfBins() {
|
||||||
|
return 40;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Reported Quality Score";
|
return "Reported Quality Score";
|
||||||
|
|
|
||||||
|
|
@ -2,24 +2,54 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: Oct 30, 2009
|
* Date: Oct 30, 2009
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ReadGroupCovariate implements Covariate{
|
public class ReadGroupCovariate implements Covariate{
|
||||||
|
|
||||||
public ReadGroupCovariate() { // empty constructor is required by CovariateCounterWalker
|
public ReadGroupCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||||
return read.getReadGroup().getReadGroupId();
|
return read.getReadGroup().getReadGroupId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(String str) {
|
public Comparable getValue(String str) {
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int estimatedNumberOfBins() {
|
||||||
|
return 300;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Read Group";
|
return "Read Group";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,31 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
|
|
@ -19,8 +44,9 @@ public class RecalDataManager {
|
||||||
public boolean collapsedTablesCreated;
|
public boolean collapsedTablesCreated;
|
||||||
public NHashMap<Double> dataSumExpectedErrors;
|
public NHashMap<Double> dataSumExpectedErrors;
|
||||||
|
|
||||||
RecalDataManager() {
|
RecalDataManager( int estimatedCapacity ) {
|
||||||
data = new NHashMap<RecalDatum>();
|
data = new NHashMap<RecalDatum>( estimatedCapacity, 0.95f ); // second arg is the 'loading factor',
|
||||||
|
// a number to monkey around with when optimizing performace of the HashMap
|
||||||
collapsedTablesCreated = false;
|
collapsedTablesCreated = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -37,17 +63,17 @@ public class RecalDataManager {
|
||||||
// preallocate for use in for loops below
|
// preallocate for use in for loops below
|
||||||
RecalDatum thisDatum;
|
RecalDatum thisDatum;
|
||||||
RecalDatum collapsedDatum;
|
RecalDatum collapsedDatum;
|
||||||
List<? extends Comparable<?>> key;
|
List<? extends Comparable> key;
|
||||||
ArrayList<Comparable<?>> newKey;
|
ArrayList<Comparable> newKey;
|
||||||
Double sumExpectedErrors;
|
Double sumExpectedErrors;
|
||||||
|
|
||||||
// for every data point in the map
|
// for every data point in the map
|
||||||
for( Map.Entry<List<? extends Comparable<?>>,RecalDatum> entry : data.entrySet() ) {
|
for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : data.entrySet() ) {
|
||||||
thisDatum = entry.getValue();
|
thisDatum = entry.getValue();
|
||||||
key = entry.getKey();
|
key = entry.getKey();
|
||||||
|
|
||||||
// create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
// create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable>();
|
||||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||||
collapsedDatum = dataCollapsedReadGroup.get( newKey );
|
collapsedDatum = dataCollapsedReadGroup.get( newKey );
|
||||||
if( collapsedDatum == null ) {
|
if( collapsedDatum == null ) {
|
||||||
|
|
@ -57,7 +83,7 @@ public class RecalDataManager {
|
||||||
collapsedDatum.increment( thisDatum );
|
collapsedDatum.increment( thisDatum );
|
||||||
}
|
}
|
||||||
|
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable>();
|
||||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||||
sumExpectedErrors = dataSumExpectedErrors.get( newKey );
|
sumExpectedErrors = dataSumExpectedErrors.get( newKey );
|
||||||
if( sumExpectedErrors == null ) {
|
if( sumExpectedErrors == null ) {
|
||||||
|
|
@ -69,7 +95,7 @@ public class RecalDataManager {
|
||||||
dataSumExpectedErrors.put( newKey, sumExpectedErrors );
|
dataSumExpectedErrors.put( newKey, sumExpectedErrors );
|
||||||
}
|
}
|
||||||
|
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable>();
|
||||||
// create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
// create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
||||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||||
newKey.add( key.get(1) ); // and quality score
|
newKey.add( key.get(1) ); // and quality score
|
||||||
|
|
@ -83,7 +109,7 @@ public class RecalDataManager {
|
||||||
|
|
||||||
// create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
|
// create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
|
||||||
for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted
|
for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable>();
|
||||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||||
newKey.add( key.get(1) ); // and quality score ...
|
newKey.add( key.get(1) ); // and quality score ...
|
||||||
newKey.add( key.get(iii + 2) ); // and the given covariate
|
newKey.add( key.get(iii + 2) ); // and the given covariate
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,31 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,31 @@ import java.util.regex.Pattern;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
|
|
@ -56,14 +81,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
int lineNumber = 0;
|
int lineNumber = 0;
|
||||||
boolean foundAllCovariates = false;
|
boolean foundAllCovariates = false;
|
||||||
dataManager = new RecalDataManager();
|
int estimatedCapacity = 1;
|
||||||
|
|
||||||
// Read in the covariates that were used from the input file
|
// Read in the covariates that were used from the input file
|
||||||
requestedCovariates = new ArrayList<Covariate>();
|
requestedCovariates = new ArrayList<Covariate>();
|
||||||
|
|
||||||
|
|
||||||
// Read in the data from the csv file and populate the map
|
// Read in the data from the csv file and populate the map
|
||||||
out.print( "Reading in the data from input file..." );
|
logger.info( "Reading in the data from input file..." );
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for ( String line : new xReadLines(new File( RECAL_FILE )) ) {
|
for ( String line : new xReadLines(new File( RECAL_FILE )) ) {
|
||||||
|
|
@ -80,6 +105,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
try {
|
try {
|
||||||
Covariate covariate = (Covariate)covClass.newInstance();
|
Covariate covariate = (Covariate)covClass.newInstance();
|
||||||
requestedCovariates.add( covariate );
|
requestedCovariates.add( covariate );
|
||||||
|
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||||
|
|
||||||
} catch ( InstantiationException e ) {
|
} catch ( InstantiationException e ) {
|
||||||
throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
|
throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
|
||||||
} catch ( IllegalAccessException e ) {
|
} catch ( IllegalAccessException e ) {
|
||||||
|
|
@ -100,6 +127,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
foundAllCovariates = true;
|
foundAllCovariates = true;
|
||||||
logger.info( "The covariates being used here: " );
|
logger.info( "The covariates being used here: " );
|
||||||
logger.info( requestedCovariates );
|
logger.info( requestedCovariates );
|
||||||
|
dataManager = new RecalDataManager( estimatedCapacity );
|
||||||
|
|
||||||
}
|
}
|
||||||
addCSVData(line);
|
addCSVData(line);
|
||||||
}
|
}
|
||||||
|
|
@ -110,12 +139,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
} catch ( NumberFormatException e ) {
|
} catch ( NumberFormatException e ) {
|
||||||
throw new RuntimeException("Error parsing recalibration data at line " + lineNumber, e);
|
throw new RuntimeException("Error parsing recalibration data at line " + lineNumber, e);
|
||||||
}
|
}
|
||||||
out.println( "...done!" );
|
logger.info( "...done!" );
|
||||||
|
|
||||||
if( MODE == RecalibrationMode.SEQUENTIAL ) {
|
if( MODE == RecalibrationMode.SEQUENTIAL ) {
|
||||||
out.print( "Creating collapsed tables for use in sequential calculation..." );
|
logger.info( "Creating collapsed tables for use in sequential calculation..." );
|
||||||
dataManager.createCollapsedTables( requestedCovariates.size() );
|
dataManager.createCollapsedTables( requestedCovariates.size() );
|
||||||
out.println( "...done!" );
|
logger.info( "...done!" );
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.println(dataManager.getCollapsedTable(1));
|
//System.out.println(dataManager.getCollapsedTable(1));
|
||||||
|
|
@ -123,7 +152,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
private void addCSVData(String line) {
|
private void addCSVData(String line) {
|
||||||
String[] vals = line.split(",");
|
String[] vals = line.split(",");
|
||||||
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
ArrayList<Comparable> key = new ArrayList<Comparable>();
|
||||||
Covariate cov; // preallocated for use in for loop below
|
Covariate cov; // preallocated for use in for loop below
|
||||||
int iii;
|
int iii;
|
||||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
||||||
|
|
@ -149,53 +178,44 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
byte[] recalQuals = originalQuals.clone();
|
byte[] recalQuals = originalQuals.clone();
|
||||||
|
|
||||||
// For each base in the read
|
// For each base in the read
|
||||||
Comparable<?> keyElement; // preallocate for use in for loops below
|
|
||||||
for( int iii = 1; iii < read.getReadLength() - 1; iii++ ) { // skip first and last base because there is no dinuc
|
for( int iii = 1; iii < read.getReadLength() - 1; iii++ ) { // skip first and last base because there is no dinuc
|
||||||
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
List<Comparable> key = new ArrayList<Comparable>();
|
||||||
boolean badKey = false;
|
|
||||||
for( Covariate covariate : requestedCovariates ) {
|
for( Covariate covariate : requestedCovariates ) {
|
||||||
keyElement = covariate.getValue( read, iii, refBases );
|
key.add( covariate.getValue( read, iii, refBases ) );
|
||||||
if ( keyElement != null ) {
|
|
||||||
key.add( keyElement );
|
|
||||||
} else {
|
|
||||||
badKey = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !badKey ) {
|
if( MODE == RecalibrationMode.COMBINATORIAL ) {
|
||||||
if( MODE == RecalibrationMode.COMBINATORIAL ) {
|
RecalDatum datum = dataManager.data.get( key );
|
||||||
RecalDatum datum = dataManager.data.get( key );
|
if( datum != null ) { // if we have data for this combination of covariates then recalibrate the quality score otherwise do nothing
|
||||||
if( datum != null ) { // if we have data for this combination of covariates then recalibrate the quality score otherwise do nothing
|
recalQuals[iii] = datum.empiricalQualByte( SMOOTHING );
|
||||||
recalQuals[iii] = datum.empiricalQualByte( SMOOTHING );
|
|
||||||
}
|
|
||||||
} else if( MODE == RecalibrationMode.SEQUENTIAL ) {
|
|
||||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
|
||||||
} else {
|
|
||||||
throw new StingException( "Specified RecalibrationMode is not supported: " + MODE );
|
|
||||||
}
|
}
|
||||||
|
} else if( MODE == RecalibrationMode.SEQUENTIAL ) {
|
||||||
|
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||||
|
} else {
|
||||||
|
throw new StingException( "Specified RecalibrationMode is not supported: " + MODE );
|
||||||
|
}
|
||||||
|
|
||||||
// Do some error checking on the new quality score
|
// Do some error checking on the new quality score
|
||||||
if ( recalQuals[iii] <= 0 || recalQuals[iii] > QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
if ( recalQuals[iii] <= 0 || recalQuals[iii] > QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
||||||
throw new StingException( "Assigning bad quality score " + key + " => " + recalQuals[iii] );
|
throw new StingException( "Assigning bad quality score " + key + " => " + recalQuals[iii] );
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
|
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
|
||||||
read.setBaseQualities(recalQuals); // overwrite old qualities with new recalibrated qualities
|
read.setBaseQualities(recalQuals); // overwrite old qualities with new recalibrated qualities
|
||||||
if ( read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // save the old qualities if there is room in the read
|
if ( read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // save the old qualities if the tag isn't already taken in the read
|
||||||
read.setAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG, QualityUtils.phredToFastq(originalQuals));
|
read.setAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG, QualityUtils.phredToFastq(originalQuals));
|
||||||
}
|
}
|
||||||
|
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte performSequentialQualityCalculation( ArrayList<? extends Comparable<?>> key ) {
|
private byte performSequentialQualityCalculation( List<? extends Comparable> key ) {
|
||||||
|
|
||||||
byte qualFromRead = Byte.parseByte(key.get(1).toString());
|
byte qualFromRead = Byte.parseByte(key.get(1).toString());
|
||||||
ArrayList<Comparable<?>> newKey;
|
ArrayList<Comparable> newKey;
|
||||||
|
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable>();
|
||||||
newKey.add( key.get(0) ); // read group
|
newKey.add( key.get(0) ); // read group
|
||||||
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
|
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
|
||||||
double globalDeltaQ = 0.0;
|
double globalDeltaQ = 0.0;
|
||||||
|
|
@ -206,7 +226,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable>();
|
||||||
newKey.add( key.get(0) ); // read group
|
newKey.add( key.get(0) ); // read group
|
||||||
newKey.add( key.get(1) ); // quality score
|
newKey.add( key.get(1) ); // quality score
|
||||||
RecalDatum deltaQReportedDatum = dataManager.getCollapsedTable(1).get( newKey );
|
RecalDatum deltaQReportedDatum = dataManager.getCollapsedTable(1).get( newKey );
|
||||||
|
|
@ -219,7 +239,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
double deltaQCovariates = 0.0;
|
double deltaQCovariates = 0.0;
|
||||||
RecalDatum deltaQCovariateDatum;
|
RecalDatum deltaQCovariateDatum;
|
||||||
for( int iii = 2; iii < key.size(); iii++ ) {
|
for( int iii = 2; iii < key.size(); iii++ ) {
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable>();
|
||||||
newKey.add( key.get(0) ); // read group
|
newKey.add( key.get(0) ); // read group
|
||||||
newKey.add( key.get(1) ); // quality score
|
newKey.add( key.get(1) ); // quality score
|
||||||
newKey.add( key.get(iii) ); // given covariate
|
newKey.add( key.get(iii) ); // given covariate
|
||||||
|
|
@ -236,6 +256,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
throw new StingException( "Illegal base quality score calculated: " + key +
|
throw new StingException( "Illegal base quality score calculated: " + key +
|
||||||
String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
|
String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//System.out.println("returning: " + newQualityByte);
|
||||||
return newQualityByte;
|
return newQualityByte;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue