Added some documentation to the code, mostly especially to CovariateCounterWalker but various comments added throughout. Also changed the HashMap data structure to accept an estimated initial capacity. This had a very modest improvement to the speed.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2001 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
74751a8ed3
commit
740a5484c4
|
|
@ -2,12 +2,41 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*
|
||||
* The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read and corresponding reference bases
|
||||
*/
|
||||
|
||||
public interface Covariate {
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases); // used to pick out the value from the read and etc
|
||||
public Comparable<?> getValue(String str); // used to get value from input file
|
||||
public Comparable getValue(SAMRecord read, int offset, char[] refBases); // used to pick out the value from the read and reference bases
|
||||
public Comparable getValue(String str); // used to get value from input file
|
||||
public int estimatedNumberOfBins(); // used to estimate the amount space required in the hashmap
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,10 +16,47 @@ import java.util.*;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*
|
||||
* This walker is designed to work as the first pass in a two-pass processing step.
|
||||
* It does a by-locus traversal operating only at sites that are not in dbSNP.
|
||||
* We assume that all mismatches we see are therefore errors.
|
||||
* This walker generates tables based on various user-specified covariates (such as read group, reported quality score, cycle, and dinuc)
|
||||
* Since there is a large amount of data one can then calculate an empirical probability of error
|
||||
* given the particular covariates seen at this site, where p(error) = num mismatches / num observations
|
||||
* The output file is a CSV list of (several covariate values, num observations, num mismatches, empirical quality score)
|
||||
* The first lines of the output file give the name of the covariate classes that were used for this calculation.
|
||||
*
|
||||
* Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and are automatically added first.
|
||||
* Note: This walker is designed to be used in conjunction with TableRecalibrationWalker.
|
||||
*/
|
||||
|
||||
@WalkerName("CountCovariatesRefactored")
|
||||
|
|
@ -36,15 +73,18 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
@Argument(fullName="recal_file", shortName="rf", required=false, doc="Filename for the outputted covariates table recalibration file")
|
||||
public String RECAL_FILE = "output.recal_data.csv";
|
||||
|
||||
protected static RecalDataManager dataManager;
|
||||
protected static ArrayList<Covariate> requestedCovariates;
|
||||
protected static RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||
protected static ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
|
||||
|
||||
/**
|
||||
* Parse the -cov arguments and create a list of covariates to be used here
|
||||
* Based on the covariates' estimates for initial capacity allocate the data hashmap
|
||||
*/
|
||||
public void initialize() {
|
||||
|
||||
dataManager = new RecalDataManager();
|
||||
|
||||
// Get a list of all available covariates
|
||||
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
||||
int estimatedCapacity = 1; // start at one because capacity is multiplicative for each covariate
|
||||
|
||||
// Print and exit if that's what was requested
|
||||
if ( LIST_ONLY ) {
|
||||
|
|
@ -57,7 +97,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
System.exit( 0 ); // early exit here because user requested it
|
||||
}
|
||||
|
||||
// Initialize the requested covariates
|
||||
// Initialize the requested covariates by parsing the -cov argument
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
requestedCovariates.add( new ReadGroupCovariate() ); // Read Group Covariate is a required covariate for the recalibration calculation
|
||||
requestedCovariates.add( new QualityScoreCovariate() ); // Quality Score Covariate is a required covariate for the recalibration calculation
|
||||
|
|
@ -67,11 +107,13 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
boolean foundClass = false;
|
||||
for( Class<?> covClass : classes ) {
|
||||
|
||||
if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
||||
if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class
|
||||
foundClass = true;
|
||||
try {
|
||||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
requestedCovariates.add( covariate );
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins(); // update the estimated initial capacity
|
||||
|
||||
if (covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate) {
|
||||
throw new StingException( "ReadGroupCovariate and QualityScoreCovariate are required covariates and are therefore added for you. Please remove them from the -cov list" );
|
||||
}
|
||||
|
|
@ -92,27 +134,48 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
|
||||
logger.info( "The covariates being used here: " );
|
||||
logger.info( requestedCovariates );
|
||||
|
||||
dataManager = new RecalDataManager( estimatedCapacity );
|
||||
}
|
||||
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// map
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* For each read at this locus get the various covariate values and increment that location in the map based on
|
||||
* whether or not the base matches the reference at this particular location
|
||||
* @param tracker the reference metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param context the alignment context
|
||||
* @return returns 1, but this value isn't used in the reduce step
|
||||
*/
|
||||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
|
||||
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
|
||||
|
||||
// Only use data from non-dbsnp sites
|
||||
// Assume every mismatch at a non-dbsnp site is indicitive of poor quality
|
||||
if( dbsnp == null ) {
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
SAMRecord read; // preallocate for use in for loop below
|
||||
int offset; // preallocate for use in for loop below
|
||||
SAMRecord read;
|
||||
int offset;
|
||||
|
||||
// For each read at this locus
|
||||
for( int iii = 0; iii < reads.size(); iii++ ) {
|
||||
read = reads.get(iii);
|
||||
offset = offsets.get(iii);
|
||||
|
||||
// Only use data from reads with mapping quality above given quality value and base quality greater than zero
|
||||
// Only use data from reads with mapping quality above specified quality value and base quality greater than zero
|
||||
byte[] quals = read.getBaseQualities();
|
||||
if( read.getMappingQuality() >= MIN_MAPPING_QUALITY && quals[offset] > 0)
|
||||
{
|
||||
if( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they don't have a dinuc count
|
||||
{
|
||||
// Skip first and last bases because they don't have a dinuc count
|
||||
if( offset > 0 && offset < (read.getReadLength() - 1) ) {
|
||||
updateDataFromRead(read, offset, ref);
|
||||
}
|
||||
}
|
||||
|
|
@ -124,43 +187,69 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Major workhorse routine for this walker.
|
||||
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
|
||||
* adding one to the number of observations and potentially one to the number of mismatches
|
||||
* @param read the read
|
||||
* @param offset the offset in the read for this locus
|
||||
* @param ref the reference context
|
||||
*/
|
||||
private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) {
|
||||
|
||||
List<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||
Comparable<?> keyElement; // preallocate for use in for loop below
|
||||
List<Comparable> key = new ArrayList<Comparable>();
|
||||
Comparable keyElement;
|
||||
boolean badKey = false;
|
||||
|
||||
// Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
keyElement = covariate.getValue( read, offset, ref.getBases() );
|
||||
if( keyElement != null ) {
|
||||
key.add( keyElement );
|
||||
} else {
|
||||
badKey = true;
|
||||
badKey = true; // covariate returned bad value, for example dinuc returns null because base = 'N'
|
||||
}
|
||||
}
|
||||
|
||||
// Using the list of covariate values as a key, pick out the RecalDatum
|
||||
RecalDatum datum = null;
|
||||
if( !badKey ) {
|
||||
datum = dataManager.data.get( key );
|
||||
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
||||
datum = new RecalDatum();
|
||||
datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
|
||||
dataManager.data.put( key, datum );
|
||||
}
|
||||
}
|
||||
|
||||
// Need the bases to determine whether or not we have a mismatch
|
||||
byte[] bases = read.getReadBases();
|
||||
|
||||
char base = (char)bases[offset];
|
||||
char refBase = ref.getBase();
|
||||
// Get the complement base strand if we are a negative direction read
|
||||
if ( read.getReadNegativeStrandFlag() ) {
|
||||
refBase = BaseUtils.simpleComplement( refBase );
|
||||
base = BaseUtils.simpleComplement( base );
|
||||
}
|
||||
|
||||
if( datum != null ) {
|
||||
// Add one to the number of observations and potentially one to the number of mismatches
|
||||
datum.increment( base, refBase );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Initialize the reudce step by creating a PrintStream from the filename specified as an argument to the walker.
|
||||
* @return returns a PrintStream created from the -rf filename
|
||||
*/
|
||||
public PrintStream reduceInit() {
|
||||
try {
|
||||
return new PrintStream( RECAL_FILE );
|
||||
|
|
@ -169,10 +258,20 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The Reduce method doesn't do anything for this walker.
|
||||
* @param value result of the map.
|
||||
* @param recalTableStream the PrintStream
|
||||
* @return returns the PrintStream used to output the CSV data
|
||||
*/
|
||||
public PrintStream reduce( Integer value, PrintStream recalTableStream ) {
|
||||
return recalTableStream; // nothing to do here
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out the full data hashmap to disk in CSV format
|
||||
* @param recalTableStream the PrintStream to write out to
|
||||
*/
|
||||
public void onTraversalDone( PrintStream recalTableStream ) {
|
||||
out.print( "Writing raw recalibration data..." );
|
||||
for( Covariate cov : requestedCovariates ) {
|
||||
|
|
@ -184,16 +283,19 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
recalTableStream.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* For each entry (key-value pair) in the data hashmap output the Covariate's values as well as the RecalDatum's data in CSV format
|
||||
* @param recalTableStream the PrintStream to write out to
|
||||
*/
|
||||
private void outputToCSV( PrintStream recalTableStream ) {
|
||||
for( Map.Entry<List<? extends Comparable<?>>, RecalDatum> entry : dataManager.data.entrySet() ) {
|
||||
//for( int iii = 0; iii < entry.getKey().size(); iii++ ) {
|
||||
//index = Integer.parseInt( (entry.getKey().get( iii )).toString());
|
||||
//recalTableStream.print( requestedCovariates.get( iii ).getValue( index ) + "," );
|
||||
// recalTableStream.print( entry.getKey().get(iii) )
|
||||
//}
|
||||
for( Comparable<?> comp : entry.getKey() ) {
|
||||
// For each entry in the data hashmap
|
||||
for( Map.Entry<List<? extends Comparable>, RecalDatum> entry : dataManager.data.entrySet() ) {
|
||||
// For each Covariate in the key
|
||||
for( Comparable comp : entry.getKey() ) {
|
||||
// Output the Covariate's value
|
||||
recalTableStream.print( comp + "," );
|
||||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println( entry.getValue().outputToCSV() );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,16 +2,42 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*/
|
||||
|
||||
public class CycleCovariate implements Covariate {
|
||||
|
||||
public String platform;
|
||||
|
||||
public CycleCovariate() { // empty constructor is required by CovariateCounterWalker
|
||||
public CycleCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
platform = null;
|
||||
}
|
||||
|
||||
|
|
@ -19,8 +45,8 @@ public class CycleCovariate implements Covariate {
|
|||
platform = _platform;
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
//BUGBUG: assumes Solexia platform
|
||||
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
//BUGBUG: assumes Solexa platform
|
||||
Integer cycle = offset;
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
cycle = read.getReadLength() - (offset + 1);
|
||||
|
|
@ -28,10 +54,14 @@ public class CycleCovariate implements Covariate {
|
|||
return cycle;
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(String str) {
|
||||
public Comparable getValue(String str) {
|
||||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
public int estimatedNumberOfBins() {
|
||||
return 100;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Cycle";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,16 +5,42 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*/
|
||||
|
||||
public class DinucCovariate implements Covariate {
|
||||
|
||||
public static ArrayList<String> BASES;
|
||||
|
||||
public DinucCovariate() { // empty constructor is required by CovariateCounterWalker
|
||||
public DinucCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
BASES = new ArrayList<String>();
|
||||
BASES.add("A");
|
||||
BASES.add("G");
|
||||
|
|
@ -26,7 +52,7 @@ public class DinucCovariate implements Covariate {
|
|||
BASES.add("t");
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
byte[] bases = read.getReadBases();
|
||||
char base = (char)bases[offset];
|
||||
char prevBase = (char)bases[offset - 1];
|
||||
|
|
@ -36,16 +62,20 @@ public class DinucCovariate implements Covariate {
|
|||
}
|
||||
// Check if bad base, probably an 'N'
|
||||
if( !BASES.contains( String.format( "%c", prevBase ) ) || !BASES.contains( String.format( "%c", base) ) ) {
|
||||
return null; // CovariateCounterWalker and TableRecalibrationWalker will recognize that null means skip this particular location in the read
|
||||
return null; // CovariateCounterWalker will recognize that null means skip this particular location in the read
|
||||
} else {
|
||||
return String.format("%c%c", prevBase, base);
|
||||
}
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(String str) {
|
||||
public Comparable getValue(String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
public int estimatedNumberOfBins() {
|
||||
return 16;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Dinucleotide";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,23 +2,53 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 4, 2009
|
||||
*/
|
||||
|
||||
public class MappingQualityCovariate implements Covariate {
|
||||
|
||||
public MappingQualityCovariate() { // empty constructor is required by CovariateCounterWalker
|
||||
public MappingQualityCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
return (Integer)(read.getMappingQuality());
|
||||
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
return (Integer)(read.getMappingQuality()); // cast to Object Integer is required so that return values from the two getValue methods hash to the same code
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(String str) {
|
||||
public Comparable getValue(String str) {
|
||||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
public int estimatedNumberOfBins() {
|
||||
return 100;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Mapping Quality Score";
|
||||
|
|
|
|||
|
|
@ -3,17 +3,43 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 4, 2009
|
||||
*/
|
||||
|
||||
public class MinimumNQSCovariate implements Covariate {
|
||||
|
||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
||||
protected boolean USE_ORIGINAL_QUALS;
|
||||
|
||||
public MinimumNQSCovariate() { // empty constructor is required by CovariateCounterWalker
|
||||
public MinimumNQSCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
USE_ORIGINAL_QUALS = false;
|
||||
}
|
||||
|
||||
|
|
@ -21,7 +47,7 @@ public class MinimumNQSCovariate implements Covariate {
|
|||
USE_ORIGINAL_QUALS = originalQuals;
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
byte[] quals = read.getBaseQualities();
|
||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||
Object obj = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
|
|
@ -41,10 +67,14 @@ public class MinimumNQSCovariate implements Covariate {
|
|||
return minQual;
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(String str) {
|
||||
public Comparable getValue(String str) {
|
||||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
public int estimatedNumberOfBins() {
|
||||
return 40;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Minimum Neighborhood Quality Score";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,18 +2,93 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*/
|
||||
public class NHashMap<T> extends HashMap<List<? extends Comparable<?>>, T> {
|
||||
|
||||
public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
|
||||
|
||||
private static final long serialVersionUID = 1L; //BUGBUG: what should I do here?
|
||||
private ArrayList<ArrayList<Comparable>> keyLists;
|
||||
|
||||
|
||||
public NHashMap() {
|
||||
super();
|
||||
keyLists = new ArrayList<ArrayList<Comparable>>();
|
||||
}
|
||||
|
||||
public static <T extends Comparable<?>> List<T> makeList(T... args) {
|
||||
public NHashMap( int initialCapacity, float loadingFactor ) {
|
||||
super( initialCapacity, loadingFactor );
|
||||
keyLists = new ArrayList<ArrayList<Comparable>>();
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
// This method is here only to help facilitate direct comparison of old and refactored recalibrator.
|
||||
// The old recalibrator prints out it's mappings in a sorted order but the refactored recalibrator doesn't need to.
|
||||
// Comment out this overriding method to improve performance
|
||||
public T put(List<? extends Comparable> key, T value) {
|
||||
|
||||
ArrayList<Comparable> thisList;
|
||||
for( int iii = 0; iii < key.size(); iii++ ) {
|
||||
thisList = keyLists.get( iii );
|
||||
if( thisList == null ) {
|
||||
thisList = new ArrayList<Comparable>();
|
||||
}
|
||||
if( !thisList.contains( key.get( iii ) ) ) {
|
||||
thisList.add( key.get(iii ) );
|
||||
}
|
||||
}
|
||||
return super.put( key, value );
|
||||
}
|
||||
|
||||
// This method is here only to help facilitate direct comparison of old and refactored recalibrator.
|
||||
// The old recalibrator prints out it's mappings in a sorted order but the refactored recalibrator doesn't need to.
|
||||
// Comment out this overriding method to improve performance
|
||||
public Set<Map.Entry<List<? extends Comparable>, T>> entrySet() {
|
||||
|
||||
for( ArrayList<Comparable> list : keyLists ) {
|
||||
Collections.sort(list);
|
||||
}
|
||||
|
||||
for( ArrayList<Comparable> list : keyLists ) {
|
||||
for( Comparable comp : list ) {
|
||||
// BUGBUG: unfinished
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
public static <T extends Comparable> List<T> makeList(T... args) {
|
||||
List<T> list = new ArrayList<T>();
|
||||
for (T arg : args)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -3,17 +3,43 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*/
|
||||
|
||||
public class QualityScoreCovariate implements Covariate {
|
||||
|
||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
||||
protected boolean USE_ORIGINAL_QUALS;
|
||||
|
||||
public QualityScoreCovariate() { // empty constructor is required by CovariateCounterWalker
|
||||
public QualityScoreCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
USE_ORIGINAL_QUALS = false;
|
||||
}
|
||||
|
||||
|
|
@ -21,7 +47,7 @@ public class QualityScoreCovariate implements Covariate {
|
|||
USE_ORIGINAL_QUALS = originalQuals;
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
byte[] quals = read.getBaseQualities();
|
||||
if ( USE_ORIGINAL_QUALS && read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) {
|
||||
Object obj = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG);
|
||||
|
|
@ -32,12 +58,16 @@ public class QualityScoreCovariate implements Covariate {
|
|||
}
|
||||
}
|
||||
|
||||
return ((Integer)((int)quals[offset]));
|
||||
return ((Integer)((int)quals[offset])); // cast to Object Integer is required so that return values from the two getValue methods hash to the same code
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(String str) {
|
||||
public Comparable getValue(String str) {
|
||||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
public int estimatedNumberOfBins() {
|
||||
return 40;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Reported Quality Score";
|
||||
|
|
|
|||
|
|
@ -2,24 +2,54 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*/
|
||||
|
||||
public class ReadGroupCovariate implements Covariate{
|
||||
|
||||
public ReadGroupCovariate() { // empty constructor is required by CovariateCounterWalker
|
||||
public ReadGroupCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
public Comparable getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
return read.getReadGroup().getReadGroupId();
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(String str) {
|
||||
public Comparable getValue(String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
public int estimatedNumberOfBins() {
|
||||
return 300;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Read Group";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,31 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -19,8 +44,9 @@ public class RecalDataManager {
|
|||
public boolean collapsedTablesCreated;
|
||||
public NHashMap<Double> dataSumExpectedErrors;
|
||||
|
||||
RecalDataManager() {
|
||||
data = new NHashMap<RecalDatum>();
|
||||
RecalDataManager( int estimatedCapacity ) {
|
||||
data = new NHashMap<RecalDatum>( estimatedCapacity, 0.95f ); // second arg is the 'loading factor',
|
||||
// a number to monkey around with when optimizing performace of the HashMap
|
||||
collapsedTablesCreated = false;
|
||||
}
|
||||
|
||||
|
|
@ -37,17 +63,17 @@ public class RecalDataManager {
|
|||
// preallocate for use in for loops below
|
||||
RecalDatum thisDatum;
|
||||
RecalDatum collapsedDatum;
|
||||
List<? extends Comparable<?>> key;
|
||||
ArrayList<Comparable<?>> newKey;
|
||||
List<? extends Comparable> key;
|
||||
ArrayList<Comparable> newKey;
|
||||
Double sumExpectedErrors;
|
||||
|
||||
// for every data point in the map
|
||||
for( Map.Entry<List<? extends Comparable<?>>,RecalDatum> entry : data.entrySet() ) {
|
||||
for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : data.entrySet() ) {
|
||||
thisDatum = entry.getValue();
|
||||
key = entry.getKey();
|
||||
|
||||
// create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||
collapsedDatum = dataCollapsedReadGroup.get( newKey );
|
||||
if( collapsedDatum == null ) {
|
||||
|
|
@ -57,7 +83,7 @@ public class RecalDataManager {
|
|||
collapsedDatum.increment( thisDatum );
|
||||
}
|
||||
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||
sumExpectedErrors = dataSumExpectedErrors.get( newKey );
|
||||
if( sumExpectedErrors == null ) {
|
||||
|
|
@ -69,7 +95,7 @@ public class RecalDataManager {
|
|||
dataSumExpectedErrors.put( newKey, sumExpectedErrors );
|
||||
}
|
||||
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey = new ArrayList<Comparable>();
|
||||
// create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||
newKey.add( key.get(1) ); // and quality score
|
||||
|
|
@ -83,7 +109,7 @@ public class RecalDataManager {
|
|||
|
||||
// create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
|
||||
for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||
newKey.add( key.get(1) ); // and quality score ...
|
||||
newKey.add( key.get(iii + 2) ); // and the given covariate
|
||||
|
|
|
|||
|
|
@ -5,6 +5,31 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
|
|||
|
|
@ -13,6 +13,31 @@ import java.util.regex.Pattern;
|
|||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
@ -56,14 +81,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
int lineNumber = 0;
|
||||
boolean foundAllCovariates = false;
|
||||
dataManager = new RecalDataManager();
|
||||
int estimatedCapacity = 1;
|
||||
|
||||
// Read in the covariates that were used from the input file
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
|
||||
|
||||
// Read in the data from the csv file and populate the map
|
||||
out.print( "Reading in the data from input file..." );
|
||||
logger.info( "Reading in the data from input file..." );
|
||||
|
||||
try {
|
||||
for ( String line : new xReadLines(new File( RECAL_FILE )) ) {
|
||||
|
|
@ -80,6 +105,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
try {
|
||||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
requestedCovariates.add( covariate );
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
|
||||
} catch ( InstantiationException e ) {
|
||||
throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
|
||||
} catch ( IllegalAccessException e ) {
|
||||
|
|
@ -100,6 +127,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
foundAllCovariates = true;
|
||||
logger.info( "The covariates being used here: " );
|
||||
logger.info( requestedCovariates );
|
||||
dataManager = new RecalDataManager( estimatedCapacity );
|
||||
|
||||
}
|
||||
addCSVData(line);
|
||||
}
|
||||
|
|
@ -110,12 +139,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
} catch ( NumberFormatException e ) {
|
||||
throw new RuntimeException("Error parsing recalibration data at line " + lineNumber, e);
|
||||
}
|
||||
out.println( "...done!" );
|
||||
logger.info( "...done!" );
|
||||
|
||||
if( MODE == RecalibrationMode.SEQUENTIAL ) {
|
||||
out.print( "Creating collapsed tables for use in sequential calculation..." );
|
||||
logger.info( "Creating collapsed tables for use in sequential calculation..." );
|
||||
dataManager.createCollapsedTables( requestedCovariates.size() );
|
||||
out.println( "...done!" );
|
||||
logger.info( "...done!" );
|
||||
}
|
||||
|
||||
//System.out.println(dataManager.getCollapsedTable(1));
|
||||
|
|
@ -123,7 +152,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
private void addCSVData(String line) {
|
||||
String[] vals = line.split(",");
|
||||
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||
ArrayList<Comparable> key = new ArrayList<Comparable>();
|
||||
Covariate cov; // preallocated for use in for loop below
|
||||
int iii;
|
||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
||||
|
|
@ -149,53 +178,44 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
byte[] recalQuals = originalQuals.clone();
|
||||
|
||||
// For each base in the read
|
||||
Comparable<?> keyElement; // preallocate for use in for loops below
|
||||
for( int iii = 1; iii < read.getReadLength() - 1; iii++ ) { // skip first and last base because there is no dinuc
|
||||
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||
boolean badKey = false;
|
||||
List<Comparable> key = new ArrayList<Comparable>();
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
keyElement = covariate.getValue( read, iii, refBases );
|
||||
if ( keyElement != null ) {
|
||||
key.add( keyElement );
|
||||
} else {
|
||||
badKey = true;
|
||||
}
|
||||
key.add( covariate.getValue( read, iii, refBases ) );
|
||||
}
|
||||
|
||||
if( !badKey ) {
|
||||
if( MODE == RecalibrationMode.COMBINATORIAL ) {
|
||||
RecalDatum datum = dataManager.data.get( key );
|
||||
if( datum != null ) { // if we have data for this combination of covariates then recalibrate the quality score otherwise do nothing
|
||||
recalQuals[iii] = datum.empiricalQualByte( SMOOTHING );
|
||||
}
|
||||
} else if( MODE == RecalibrationMode.SEQUENTIAL ) {
|
||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||
} else {
|
||||
throw new StingException( "Specified RecalibrationMode is not supported: " + MODE );
|
||||
if( MODE == RecalibrationMode.COMBINATORIAL ) {
|
||||
RecalDatum datum = dataManager.data.get( key );
|
||||
if( datum != null ) { // if we have data for this combination of covariates then recalibrate the quality score otherwise do nothing
|
||||
recalQuals[iii] = datum.empiricalQualByte( SMOOTHING );
|
||||
}
|
||||
} else if( MODE == RecalibrationMode.SEQUENTIAL ) {
|
||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||
} else {
|
||||
throw new StingException( "Specified RecalibrationMode is not supported: " + MODE );
|
||||
}
|
||||
|
||||
// Do some error checking on the new quality score
|
||||
if ( recalQuals[iii] <= 0 || recalQuals[iii] > QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
||||
throw new StingException( "Assigning bad quality score " + key + " => " + recalQuals[iii] );
|
||||
}
|
||||
// Do some error checking on the new quality score
|
||||
if ( recalQuals[iii] <= 0 || recalQuals[iii] > QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
||||
throw new StingException( "Assigning bad quality score " + key + " => " + recalQuals[iii] );
|
||||
}
|
||||
}
|
||||
|
||||
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
|
||||
read.setBaseQualities(recalQuals); // overwrite old qualities with new recalibrated qualities
|
||||
if ( read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // save the old qualities if there is room in the read
|
||||
if ( read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) == null ) { // save the old qualities if the tag isn't already taken in the read
|
||||
read.setAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG, QualityUtils.phredToFastq(originalQuals));
|
||||
}
|
||||
|
||||
return read;
|
||||
}
|
||||
|
||||
private byte performSequentialQualityCalculation( ArrayList<? extends Comparable<?>> key ) {
|
||||
private byte performSequentialQualityCalculation( List<? extends Comparable> key ) {
|
||||
|
||||
byte qualFromRead = Byte.parseByte(key.get(1).toString());
|
||||
ArrayList<Comparable<?>> newKey;
|
||||
ArrayList<Comparable> newKey;
|
||||
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // read group
|
||||
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
|
||||
double globalDeltaQ = 0.0;
|
||||
|
|
@ -206,7 +226,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
|
||||
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // read group
|
||||
newKey.add( key.get(1) ); // quality score
|
||||
RecalDatum deltaQReportedDatum = dataManager.getCollapsedTable(1).get( newKey );
|
||||
|
|
@ -219,7 +239,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
double deltaQCovariates = 0.0;
|
||||
RecalDatum deltaQCovariateDatum;
|
||||
for( int iii = 2; iii < key.size(); iii++ ) {
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // read group
|
||||
newKey.add( key.get(1) ); // quality score
|
||||
newKey.add( key.get(iii) ); // given covariate
|
||||
|
|
@ -236,6 +256,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
throw new StingException( "Illegal base quality score calculated: " + key +
|
||||
String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
|
||||
}
|
||||
|
||||
//System.out.println("returning: " + newQualityByte);
|
||||
return newQualityByte;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue