Optimization - Added method to Covariates: void getValues( SAMRecord read, Comparable[] comparable ) which takes an array of size (at least) read.getReadLength() and fills it with covariate values for all positions in the given read. Made CovariateCounterWalker and TableRecalibrationWalker use this method instead of calling getValue(..) for each covariate and each offset.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2863 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
32d14d988e
commit
b236714c8a
|
|
@ -41,6 +41,9 @@ public interface Covariate {
|
||||||
public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers
|
public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
public Comparable getValue( SAMRecord read, int offset ); // Used to pick out the covariate's value from attributes of the read
|
public Comparable getValue( SAMRecord read, int offset ); // Used to pick out the covariate's value from attributes of the read
|
||||||
public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
|
public void getValues( SAMRecord read, Comparable[] comparable ); //Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
||||||
|
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
||||||
|
//read-specific calculations to be done just once rather than for each offset.
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RequiredCovariate extends Covariate {
|
interface RequiredCovariate extends Covariate {
|
||||||
|
|
|
||||||
|
|
@ -1,23 +1,35 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.walkers.By;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.NestedHashMap;
|
||||||
|
import org.broadinstitute.sting.utils.PackageUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import java.io.PrintStream;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -68,6 +80,13 @@ import net.sf.samtools.SAMRecord;
|
||||||
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
|
||||||
public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
|
|
||||||
|
/////////////////////////////
|
||||||
|
// Constants
|
||||||
|
/////////////////////////////
|
||||||
|
private static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; //used to label GATKSAMRecords that should be skipped.
|
||||||
|
private static final String SEEN_ATTRIBUTE = "SEEN"; //used to label GATKSAMRecords as processed.
|
||||||
|
private static final String COVARS_ATTRIBUTE = "COVARS"; //used to store covariates array as a temporary attribute inside GATKSAMRecord.
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Shared Arguments
|
// Shared Arguments
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
@ -252,43 +271,57 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
if( !isSNP && ( ++numUnprocessed >= PROCESS_EVERY_NTH_LOCUS ) ) {
|
if( !isSNP && ( ++numUnprocessed >= PROCESS_EVERY_NTH_LOCUS ) ) {
|
||||||
numUnprocessed = 0; // Reset the counter because we are processing this very locus
|
numUnprocessed = 0; // Reset the counter because we are processing this very locus
|
||||||
|
|
||||||
SAMRecord read;
|
GATKSAMRecord gatkRead;
|
||||||
int offset;
|
int offset;
|
||||||
byte refBase;
|
byte refBase;
|
||||||
byte[] bases;
|
byte[] bases;
|
||||||
|
|
||||||
// For each read at this locus
|
// For each read at this locus
|
||||||
for( PileupElement p : context.getBasePileup() ) {
|
for( PileupElement p : context.getBasePileup() ) {
|
||||||
read = p.getRead();
|
gatkRead = (GATKSAMRecord) p.getRead();
|
||||||
offset = p.getOffset();
|
offset = p.getOffset();
|
||||||
|
|
||||||
RecalDataManager.parseSAMRecord( read, RAC );
|
if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Skip over reads with no calls in the color space if the user requested it
|
if( !gatkRead.containsTemporaryAttribute( SEEN_ATTRIBUTE ) )
|
||||||
if( !RAC.IGNORE_NOCALL_COLORSPACE || !RecalDataManager.checkNoCallColorSpace( read ) ) {
|
{
|
||||||
RecalDataManager.parseColorSpace( read );
|
gatkRead.setTemporaryAttribute( SEEN_ATTRIBUTE, true );
|
||||||
|
RecalDataManager.parseSAMRecord( gatkRead, RAC );
|
||||||
|
|
||||||
// Skip if base quality is zero
|
// Skip over reads with no calls in the color space if the user requested it
|
||||||
if( read.getBaseQualities()[offset] > 0 ) {
|
if( RAC.IGNORE_NOCALL_COLORSPACE && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) {
|
||||||
|
gatkRead.setTemporaryAttribute( SKIP_RECORD_ATTRIBUTE, true);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
bases = read.getReadBases();
|
RecalDataManager.parseColorSpace( gatkRead );
|
||||||
refBase = (byte)ref.getBase();
|
gatkRead.setTemporaryAttribute( COVARS_ATTRIBUTE,
|
||||||
|
RecalDataManager.computeCovariates( gatkRead, requestedCovariates ));
|
||||||
|
}
|
||||||
|
|
||||||
// Skip if this base is an 'N' or etc.
|
|
||||||
if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) {
|
|
||||||
|
|
||||||
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
// Skip this position if base quality is zero
|
||||||
if( !read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) {
|
if( gatkRead.getBaseQualities()[offset] > 0 ) {
|
||||||
|
|
||||||
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
bases = gatkRead.getReadBases();
|
||||||
updateDataFromRead( read, offset, refBase );
|
refBase = (byte)ref.getBase();
|
||||||
|
|
||||||
} else { // calculate SOLID reference insertion rate
|
// Skip if this base is an 'N' or etc.
|
||||||
if( ref.getBase() == (char)bases[offset] ) {
|
if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) {
|
||||||
solidInsertedReferenceBases++;
|
|
||||||
} else {
|
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||||
otherColorSpaceInconsistency++;
|
if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
|
||||||
}
|
|
||||||
|
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||||
|
updateDataFromRead( gatkRead, offset, refBase );
|
||||||
|
|
||||||
|
} else { // calculate SOLID reference insertion rate
|
||||||
|
if( refBase == (char)bases[offset] ) {
|
||||||
|
solidInsertedReferenceBases++;
|
||||||
|
} else {
|
||||||
|
otherColorSpaceInconsistency++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -311,6 +344,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
return 1; // This value isn't actually used anywhere
|
return 1; // This value isn't actually used anywhere
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the mismatch / total_base counts for a given class of loci.
|
* Update the mismatch / total_base counts for a given class of loci.
|
||||||
*
|
*
|
||||||
|
|
@ -358,29 +393,26 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
* adding one to the number of observations and potentially one to the number of mismatches
|
* adding one to the number of observations and potentially one to the number of mismatches
|
||||||
* Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
|
* Lots of things are passed as parameters to this method as a strategy for optimizing the covariate.getValue calls
|
||||||
* because pulling things out of the SAMRecord is an expensive operation.
|
* because pulling things out of the SAMRecord is an expensive operation.
|
||||||
* @param read The SAMRecord holding all the data for this read
|
* @param gatkRead The SAMRecord holding all the data for this read
|
||||||
* @param offset The offset in the read for this locus
|
* @param offset The offset in the read for this locus
|
||||||
* @param refBase The reference base at this locus
|
* @param refBase The reference base at this locus
|
||||||
*/
|
*/
|
||||||
private void updateDataFromRead(final SAMRecord read, final int offset, final byte refBase) {
|
private void updateDataFromRead(final GATKSAMRecord gatkRead, final int offset, final byte refBase) {
|
||||||
|
|
||||||
final Object[] key = new Object[requestedCovariates.size()];
|
|
||||||
|
|
||||||
// Loop through the list of requested covariates and pick out the value from the read and offset
|
final Object[][] covars = (Comparable[][]) gatkRead.getTemporaryAttribute(COVARS_ATTRIBUTE);
|
||||||
int iii = 0;
|
final Object[] key = covars[offset];
|
||||||
for( Covariate covariate : requestedCovariates ) {
|
|
||||||
key[iii++] = covariate.getValue( read, offset );
|
|
||||||
}
|
|
||||||
|
|
||||||
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
|
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
|
||||||
RecalDatumOptimized datum = (RecalDatumOptimized) dataManager.data.get( key );
|
final NestedHashMap data = dataManager.data; //optimization - create local reference
|
||||||
|
RecalDatumOptimized datum = (RecalDatumOptimized) data.get( key );
|
||||||
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
||||||
datum = new RecalDatumOptimized(); // initialized with zeros, will be incremented at end of method
|
datum = new RecalDatumOptimized(); // initialized with zeros, will be incremented at end of method
|
||||||
dataManager.data.put( datum, (Object[])key );
|
data.put( datum, (Object[])key );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Need the bases to determine whether or not we have a mismatch
|
// Need the bases to determine whether or not we have a mismatch
|
||||||
final byte base = read.getReadBases()[offset];
|
final byte base = gatkRead.getReadBases()[offset];
|
||||||
final long curMismatches = datum.getNumMismatches();
|
final long curMismatches = datum.getNumMismatches();
|
||||||
|
|
||||||
// Add one to the number of observations and potentially one to the number of mismatches
|
// Add one to the number of observations and potentially one to the number of mismatches
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,11 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
|
|
@ -72,9 +73,9 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX"
|
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX"
|
||||||
read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID"
|
read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID"
|
||||||
cycle = offset + 1;
|
cycle = offset + 1;
|
||||||
if( read.getReadNegativeStrandFlag() ) {
|
if( read.getReadNegativeStrandFlag() ) {
|
||||||
cycle = read.getReadLength() - offset;
|
cycle = read.getReadLength() - offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//-----------------------------
|
//-----------------------------
|
||||||
|
|
@ -119,10 +120,10 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
//else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
|
//else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
|
||||||
// // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
// // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||||
// int pos = offset + 1;
|
// int pos = offset + 1;
|
||||||
// if( read.getReadNegativeStrandFlag() ) {
|
// if( read.getReadNegativeStrandFlag() ) {
|
||||||
// pos = read.getReadLength() - offset;
|
// pos = read.getReadLength() - offset;
|
||||||
// }
|
// }
|
||||||
// cycle = pos / 5; // integer division
|
// cycle = pos / 5; // integer division
|
||||||
//}
|
//}
|
||||||
|
|
||||||
//-----------------------------
|
//-----------------------------
|
||||||
|
|
@ -158,6 +159,106 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
return cycle;
|
return cycle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
|
||||||
|
//-----------------------------
|
||||||
|
// ILLUMINA and SOLID
|
||||||
|
//-----------------------------
|
||||||
|
|
||||||
|
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX"
|
||||||
|
read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID"
|
||||||
|
|
||||||
|
final int init;
|
||||||
|
final int increment;
|
||||||
|
if( !read.getReadNegativeStrandFlag() ) {
|
||||||
|
// Differentiate between first and second of pair.
|
||||||
|
// The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
|
||||||
|
// to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
|
||||||
|
// Therefore the cycle covariate must differentiate between first and second of pair reads.
|
||||||
|
// This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
|
||||||
|
// the current sequential model would consider the effects independently instead of jointly.
|
||||||
|
if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) {
|
||||||
|
//second of pair, positive strand
|
||||||
|
init = -1;
|
||||||
|
increment = -1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//first of pair, positive strand
|
||||||
|
init = 1;
|
||||||
|
increment = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) {
|
||||||
|
//second of pair, negative strand
|
||||||
|
init = -read.getReadLength();
|
||||||
|
increment = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//first of pair, negative strand
|
||||||
|
init = read.getReadLength();
|
||||||
|
increment = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cycle = init;
|
||||||
|
for(int i = 0; i < read.getReadLength(); i++) {
|
||||||
|
comparable[i] = cycle;
|
||||||
|
cycle += increment;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454"
|
||||||
|
|
||||||
|
final int readLength = read.getReadLength();
|
||||||
|
final byte[] bases = read.getReadBases();
|
||||||
|
|
||||||
|
// Differentiate between first and second of pair.
|
||||||
|
// The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
|
||||||
|
// to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
|
||||||
|
// Therefore the cycle covariate must differentiate between first and second of pair reads.
|
||||||
|
// This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
|
||||||
|
// the current sequential model would consider the effects independently instead of jointly.
|
||||||
|
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
|
||||||
|
|
||||||
|
int cycle = multiplyByNegative1 ? -1 : 1;
|
||||||
|
|
||||||
|
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
|
||||||
|
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
||||||
|
if( !read.getReadNegativeStrandFlag() ) { // Forward direction
|
||||||
|
int iii = 0;
|
||||||
|
while( iii < readLength )
|
||||||
|
{
|
||||||
|
while( iii < readLength && bases[iii] == (byte)'T' ) { comparable[iii] = cycle; iii++; }
|
||||||
|
while( iii < readLength && bases[iii] == (byte)'A' ) { comparable[iii] = cycle; iii++; }
|
||||||
|
while( iii < readLength && bases[iii] == (byte)'C' ) { comparable[iii] = cycle; iii++; }
|
||||||
|
while( iii < readLength && bases[iii] == (byte)'G' ) { comparable[iii] = cycle; iii++; }
|
||||||
|
if( iii < readLength ) { if (multiplyByNegative1) cycle--; else cycle++; }
|
||||||
|
if( iii < readLength && !BaseUtils.isRegularBase(bases[iii]) ) { comparable[iii] = cycle; iii++; }
|
||||||
|
|
||||||
|
}
|
||||||
|
} else { // Negative direction
|
||||||
|
int iii = readLength-1;
|
||||||
|
while( iii >= 0 )
|
||||||
|
{
|
||||||
|
while( iii >= 0 && bases[iii] == (byte)'T' ) { comparable[iii] = cycle; iii--; }
|
||||||
|
while( iii >= 0 && bases[iii] == (byte)'A' ) { comparable[iii] = cycle; iii--; }
|
||||||
|
while( iii >= 0 && bases[iii] == (byte)'C' ) { comparable[iii] = cycle; iii--; }
|
||||||
|
while( iii >= 0 && bases[iii] == (byte)'G' ) { comparable[iii] = cycle; iii--; }
|
||||||
|
if( iii >= 0 ) { if (multiplyByNegative1) cycle--; else cycle++; }
|
||||||
|
if( iii >= 0 && !BaseUtils.isRegularBase(bases[iii]) ) { comparable[iii] = cycle; iii--; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
public final Comparable getValue( final String str ) {
|
public final Comparable getValue( final String str ) {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ public class Dinuc implements Comparable<Dinuc>{
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int hashBytes(final byte byte1, final byte byte2) {
|
public static int hashBytes(final byte byte1, final byte byte2) {
|
||||||
return byte1 << 16 + byte2 << 4;
|
return byte1 << 8 + byte2;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() { // This method call is how the Dinuc will get written out to the table recalibration file
|
public String toString() { // This method call is how the Dinuc will get written out to the table recalibration file
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,10 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
|
|
@ -45,7 +46,7 @@ public class DinucCovariate implements StandardCovariate {
|
||||||
private static final byte NO_CALL = (byte)'N';
|
private static final byte NO_CALL = (byte)'N';
|
||||||
private static final Dinuc NO_DINUC = new Dinuc(NO_CALL, NO_CALL);
|
private static final Dinuc NO_DINUC = new Dinuc(NO_CALL, NO_CALL);
|
||||||
|
|
||||||
HashMap<Integer, Dinuc> dinucHashMap;
|
private HashMap<Integer, Dinuc> dinucHashMap;
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
public void initialize( final RecalibrationArgumentCollection RAC ) {
|
public void initialize( final RecalibrationArgumentCollection RAC ) {
|
||||||
|
|
@ -93,6 +94,45 @@ public class DinucCovariate implements StandardCovariate {
|
||||||
return dinucHashMap.get( Dinuc.hashBytes( prevBase, base ) );
|
return dinucHashMap.get( Dinuc.hashBytes( prevBase, base ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
|
||||||
|
*/
|
||||||
|
public void getValues( SAMRecord read, Comparable[] result ) {
|
||||||
|
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
|
||||||
|
final int readLength = read.getReadLength();
|
||||||
|
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||||
|
byte[] bases = read.getReadBases();
|
||||||
|
byte base;
|
||||||
|
byte prevBase;
|
||||||
|
int offset = 0;
|
||||||
|
// If this is a negative strand read then we need to reverse the direction for our previous base
|
||||||
|
|
||||||
|
if(negativeStrand) {
|
||||||
|
bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
|
||||||
|
}
|
||||||
|
result[0] = NO_DINUC; // No dinuc at the beginning of the read
|
||||||
|
|
||||||
|
prevBase = bases[0];
|
||||||
|
offset++;
|
||||||
|
while(offset < readLength) {
|
||||||
|
// Note: We are using the previous base in the read, not the
|
||||||
|
// previous base in the reference. This is done in part to be consistent with unmapped reads.
|
||||||
|
base = bases[offset];
|
||||||
|
if( BaseUtils.isRegularBase( prevBase ) ) {
|
||||||
|
result[offset] = dinucHashMapRef.get( Dinuc.hashBytes( prevBase, base ) );
|
||||||
|
} else {
|
||||||
|
result[offset] = NO_DINUC;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset++;
|
||||||
|
prevBase = base;
|
||||||
|
}
|
||||||
|
if(negativeStrand) {
|
||||||
|
reverse( result );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
public final Comparable getValue( final String str ) {
|
public final Comparable getValue( final String str ) {
|
||||||
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
|
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
|
||||||
|
|
@ -100,7 +140,20 @@ public class DinucCovariate implements StandardCovariate {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return returnDinuc;
|
return returnDinuc;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverses the given array in place.
|
||||||
|
*
|
||||||
|
* @param array
|
||||||
|
*/
|
||||||
|
private static final void reverse(final Comparable[] array) {
|
||||||
|
final int arrayLength = array.length;
|
||||||
|
for(int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
||||||
|
final Comparable temp = array[l];
|
||||||
|
array[l] = array[r];
|
||||||
|
array[r] = temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -76,9 +76,15 @@ public class GCContentCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
throw new IllegalStateException("Not yet implemented");
|
||||||
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
public final Comparable getValue( final String str ) {
|
public final Comparable getValue( final String str ) {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -90,6 +90,10 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
||||||
return numAgree;
|
return numAgree;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
throw new IllegalStateException("Not yet implemented");
|
||||||
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
public final Comparable getValue( final String str ) {
|
public final Comparable getValue( final String str ) {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||||
return read.getMappingQuality();
|
return read.getMappingQuality();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
|
|
@ -51,4 +51,8 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
throw new IllegalStateException("Not yet implemented");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||||
|
|
||||||
// Loop over the list of base quality scores in the window and find the minimum
|
// Loop over the list of base quality scores in the window and find the minimum
|
||||||
final byte[] quals = read.getBaseQualities();
|
final byte[] quals = read.getBaseQualities();
|
||||||
int minQual = quals[offset];
|
int minQual = quals[offset];
|
||||||
final int minIndex = Math.max(offset - windowReach, 0);
|
final int minIndex = Math.max(offset - windowReach, 0);
|
||||||
|
|
@ -66,4 +66,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
throw new IllegalStateException("Not yet implemented");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,4 +56,8 @@ public class PositionCovariate implements ExperimentalCovariate {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
throw new IllegalStateException("Not yet implemented");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -47,10 +47,10 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
|
||||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||||
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
|
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
|
||||||
int pos = offset;
|
int pos = offset;
|
||||||
if( read.getReadNegativeStrandFlag() ) {
|
if( read.getReadNegativeStrandFlag() ) {
|
||||||
pos = read.getReadLength() - (offset + 1);
|
pos = read.getReadLength() - (offset + 1);
|
||||||
}
|
}
|
||||||
return pos % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
return pos % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||||
} else {
|
} else {
|
||||||
return 1; // nothing to do here because it is always the same
|
return 1; // nothing to do here because it is always the same
|
||||||
}
|
}
|
||||||
|
|
@ -62,4 +62,7 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
throw new IllegalStateException("Not yet implemented");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -43,7 +43,14 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||||
return (int)(read.getBaseQualities()[offset]);
|
return (int)(read.getBaseQualities()[offset]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
byte[] baseQualities = read.getBaseQualities();
|
||||||
|
for(int i = 0; i < read.getReadLength(); i++) {
|
||||||
|
comparable[i] = (int) baseQualities[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
|
|
|
||||||
|
|
@ -45,12 +45,19 @@ public class ReadGroupCovariate implements RequiredCovariate{
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||||
return read.getReadGroup().getReadGroupId();
|
return read.getReadGroup().getReadGroupId();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
final String readGroupId = read.getReadGroup().getReadGroupId();
|
||||||
|
for(int i = 0; i < read.getReadLength(); i++) {
|
||||||
|
comparable[i] = readGroupId;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
public final Comparable getValue( final String str ) {
|
public final Comparable getValue( final String str ) {
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -58,14 +58,14 @@ public class RecalDataManager {
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
RecalDataManager() {
|
RecalDataManager() {
|
||||||
data = new NestedHashMap();
|
data = new NestedHashMap();
|
||||||
dataCollapsedReadGroup = null;
|
dataCollapsedReadGroup = null;
|
||||||
dataCollapsedQualityScore = null;
|
dataCollapsedQualityScore = null;
|
||||||
dataCollapsedByCovariate = null;
|
dataCollapsedByCovariate = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
RecalDataManager( final boolean createCollapsedTables, final int numCovariates ) {
|
RecalDataManager( final boolean createCollapsedTables, final int numCovariates ) {
|
||||||
if( createCollapsedTables ) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
|
if( createCollapsedTables ) { // Initialize all the collapsed tables, only used by TableRecalibrationWalker
|
||||||
data = null;
|
data = null;
|
||||||
dataCollapsedReadGroup = new NestedHashMap();
|
dataCollapsedReadGroup = new NestedHashMap();
|
||||||
dataCollapsedQualityScore = new NestedHashMap();
|
dataCollapsedQualityScore = new NestedHashMap();
|
||||||
|
|
@ -547,4 +547,36 @@ public class RecalDataManager {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes all requested covariates for every offset in the given read
|
||||||
|
* by calling covariate.getValues(..).
|
||||||
|
*
|
||||||
|
* @param gatkRead The read for which to compute covariate values.
|
||||||
|
* @param requestedCovariates The list of requested covariates.
|
||||||
|
* @return An array of covariate values where result[i][j] is the covariate
|
||||||
|
* value for the ith position in the read and the jth covariate in
|
||||||
|
* reqeustedCovariates list.
|
||||||
|
*/
|
||||||
|
public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates) {
|
||||||
|
//compute all covariates for this read
|
||||||
|
final List<Covariate> requestedCovariatesRef = requestedCovariates;
|
||||||
|
final int numRequestedCovariates = requestedCovariatesRef.size();
|
||||||
|
final int readLength = gatkRead.getReadLength();
|
||||||
|
|
||||||
|
final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
|
||||||
|
final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
|
||||||
|
|
||||||
|
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||||
|
for( int i = 0; i < numRequestedCovariates; i++ ) {
|
||||||
|
requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder );
|
||||||
|
for(int j = 0; j < readLength; j++) {
|
||||||
|
//copy values into a 2D array that allows all covar types to be extracted at once for
|
||||||
|
//an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
||||||
|
covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return covariateValues_offset_x_covar;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,24 +1,41 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import java.io.File;
|
||||||
import net.sf.samtools.util.SequenceUtil;
|
import java.io.FileNotFoundException;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import java.lang.reflect.Field;
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.*;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.ResourceBundle;
|
import java.util.ResourceBundle;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import java.lang.reflect.Field;
|
import net.sf.samtools.SAMFileWriter;
|
||||||
|
import net.sf.samtools.SAMProgramRecord;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMTag;
|
||||||
|
import net.sf.samtools.util.SequenceUtil;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
|
import org.broadinstitute.sting.utils.JVMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.NestedHashMap;
|
||||||
|
import org.broadinstitute.sting.utils.PackageUtils;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.TextFormattingUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.xReadLines;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.ArgumentDefinition;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.ArgumentSource;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.ArgumentTypeDescriptor;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -33,7 +50,7 @@ import java.lang.reflect.Field;
|
||||||
* conditions:
|
* conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be
|
* The above copyright notice and this permission notice shall be
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
|
@ -100,6 +117,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
private final Random coinFlip = new Random( RANDOM_SEED ); // Random number generator is used to remove reference bias in solid bams
|
private final Random coinFlip = new Random( RANDOM_SEED ); // Random number generator is used to remove reference bias in solid bams
|
||||||
private long numReadsWithMalformedColorSpace = 0;
|
private long numReadsWithMalformedColorSpace = 0;
|
||||||
|
|
||||||
|
/////////////////////////////
|
||||||
|
// Optimization
|
||||||
|
/////////////////////////////
|
||||||
|
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||||
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// initialize
|
// initialize
|
||||||
|
|
@ -277,7 +300,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
||||||
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
||||||
dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
|
dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -311,19 +333,23 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
|
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
|
||||||
}
|
}
|
||||||
|
|
||||||
final Object[] fullCovariateKey = new Object[requestedCovariates.size()];
|
//compute all covariate values for this read
|
||||||
|
final Comparable[][] covariateValues_offset_x_covar =
|
||||||
|
RecalDataManager.computeCovariates((GATKSAMRecord) read, requestedCovariates);
|
||||||
|
|
||||||
// For each base in the read
|
// For each base in the read
|
||||||
final int readLength = read.getReadLength();
|
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
||||||
for( int offset = 0; offset < readLength; offset++ ) {
|
|
||||||
|
|
||||||
// Loop through the list of requested covariates and pick out the value from the read and offset
|
final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
|
||||||
int iii = 0;
|
|
||||||
for( Covariate covariate : requestedCovariates ) {
|
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
||||||
fullCovariateKey[iii++] = covariate.getValue( read, offset );
|
if(qualityScore == null)
|
||||||
|
{
|
||||||
|
qualityScore = performSequentialQualityCalculation( fullCovariateKey );
|
||||||
|
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
recalQuals[offset] = performSequentialQualityCalculation( fullCovariateKey );
|
recalQuals[offset] = qualityScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
||||||
|
|
|
||||||
|
|
@ -59,4 +59,10 @@ public class TileCovariate implements ExperimentalCovariate {
|
||||||
return Integer.parseInt( str );
|
return Integer.parseInt( str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||||
|
final Comparable tileCovariateValue = getValue(read, 0);
|
||||||
|
for(int i = 0; i < read.getReadLength(); i++) {
|
||||||
|
comparable[i] = tileCovariateValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,9 @@ public class NestedHashMap{
|
||||||
|
|
||||||
public Object get( final Object... keys ) {
|
public Object get( final Object... keys ) {
|
||||||
Map map = this.data;
|
Map map = this.data;
|
||||||
for( int iii = 0; iii < keys.length; iii++ ) {
|
final int keysLength = keys.length;
|
||||||
if( iii == keys.length - 1 ) {
|
for( int iii = 0; iii < keysLength; iii++ ) {
|
||||||
|
if( iii == keysLength - 1 ) {
|
||||||
return map.get(keys[iii]);
|
return map.get(keys[iii]);
|
||||||
} else {
|
} else {
|
||||||
map = (Map) map.get(keys[iii]);
|
map = (Map) map.get(keys[iii]);
|
||||||
|
|
@ -54,8 +55,9 @@ public class NestedHashMap{
|
||||||
public void put( final Object value, final Object... keys ) {
|
public void put( final Object value, final Object... keys ) {
|
||||||
|
|
||||||
Map map = this.data;
|
Map map = this.data;
|
||||||
for( int iii = 0; iii < keys.length; iii++ ) {
|
final int keysLength = keys.length;
|
||||||
if( iii == keys.length - 1 ) {
|
for( int iii = 0; iii < keysLength; iii++ ) {
|
||||||
|
if( iii == keysLength - 1 ) {
|
||||||
map.put(keys[iii], value);
|
map.put(keys[iii], value);
|
||||||
} else {
|
} else {
|
||||||
Map tmp = (Map) map.get(keys[iii]);
|
Map tmp = (Map) map.get(keys[iii]);
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,15 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.AlignmentBlock;
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMValidationError;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author ebanks
|
* @author ebanks
|
||||||
|
|
@ -19,23 +26,33 @@ import net.sf.samtools.*;
|
||||||
public class GATKSAMRecord extends SAMRecord {
|
public class GATKSAMRecord extends SAMRecord {
|
||||||
|
|
||||||
// the underlying SAMRecord which we are wrapping
|
// the underlying SAMRecord which we are wrapping
|
||||||
private SAMRecord mRecord;
|
private final SAMRecord mRecord;
|
||||||
|
|
||||||
// the SAMRecord data we're caching
|
// the SAMRecord data we're caching
|
||||||
private String mReadString = null;
|
private String mReadString = null;
|
||||||
private SAMReadGroupRecord mReadGroup = null;
|
private SAMReadGroupRecord mReadGroup = null;
|
||||||
private Boolean mNegativeStrandFlag = null;
|
private boolean mNegativeStrandFlag;
|
||||||
private Boolean mUnmappedFlag = null;
|
private boolean mUnmappedFlag;
|
||||||
|
private Boolean mSecondOfPairFlag = null;
|
||||||
|
|
||||||
// because some values can be null, we don't want to duplicate effort
|
// because some values can be null, we don't want to duplicate effort
|
||||||
private boolean retrievedReadGroup = false;
|
private boolean retrievedReadGroup = false;
|
||||||
|
|
||||||
|
// These temporary attributes were added here to make life easier for
|
||||||
|
// certain algorithms by providing a way to label or attach arbitrary data to
|
||||||
|
// individual GATKSAMRecords.
|
||||||
|
// These attributes exist in memory only, and are never written to disk.
|
||||||
|
private Map<Object, Object> temporaryAttributes;
|
||||||
|
|
||||||
public GATKSAMRecord(SAMRecord record) {
|
public GATKSAMRecord(SAMRecord record) {
|
||||||
super(null); // it doesn't matter - this isn't used
|
super(null); // it doesn't matter - this isn't used
|
||||||
if ( record == null )
|
if ( record == null )
|
||||||
throw new IllegalArgumentException("The SAMRecord argument cannot be null");
|
throw new IllegalArgumentException("The SAMRecord argument cannot be null");
|
||||||
mRecord = record;
|
mRecord = record;
|
||||||
|
|
||||||
|
mNegativeStrandFlag = mRecord.getReadNegativeStrandFlag();
|
||||||
|
mUnmappedFlag = mRecord.getReadUnmappedFlag();
|
||||||
|
|
||||||
// because attribute methods are declared to be final (and we can't overload them),
|
// because attribute methods are declared to be final (and we can't overload them),
|
||||||
// we need to actually set all of the attributes here
|
// we need to actually set all of the attributes here
|
||||||
|
|
||||||
|
|
@ -79,8 +96,6 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getReadUnmappedFlag() {
|
public boolean getReadUnmappedFlag() {
|
||||||
if ( mUnmappedFlag == null )
|
|
||||||
mUnmappedFlag = mRecord.getReadUnmappedFlag();
|
|
||||||
return mUnmappedFlag;
|
return mUnmappedFlag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -90,8 +105,6 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean getReadNegativeStrandFlag() {
|
public boolean getReadNegativeStrandFlag() {
|
||||||
if ( mNegativeStrandFlag == null )
|
|
||||||
mNegativeStrandFlag = mRecord.getReadNegativeStrandFlag();
|
|
||||||
return mNegativeStrandFlag;
|
return mNegativeStrandFlag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -100,6 +113,90 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
mNegativeStrandFlag = b;
|
mNegativeStrandFlag = b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean getSecondOfPairFlag() {
|
||||||
|
if( mSecondOfPairFlag == null ) {
|
||||||
|
//not done in constructor because this method can't be called for
|
||||||
|
//all SAMRecords.
|
||||||
|
mSecondOfPairFlag = mRecord.getSecondOfPairFlag();
|
||||||
|
}
|
||||||
|
return mSecondOfPairFlag;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSecondOfPairFlag(boolean b) {
|
||||||
|
mRecord.setSecondOfPairFlag(b);
|
||||||
|
mSecondOfPairFlag = b;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether an attribute has been set for the given key.
|
||||||
|
*
|
||||||
|
* Temporary attributes provide a way to label or attach arbitrary data to
|
||||||
|
* individual GATKSAMRecords. These attributes exist in memory only,
|
||||||
|
* and are never written to disk.
|
||||||
|
*
|
||||||
|
* @param key
|
||||||
|
* @return True if an attribute has been set for this key.
|
||||||
|
*/
|
||||||
|
public boolean containsTemporaryAttribute(Object key) {
|
||||||
|
if(temporaryAttributes != null) {
|
||||||
|
return temporaryAttributes.containsKey(key);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the key to the given value, replacing any previous value. The previous
|
||||||
|
* value is returned.
|
||||||
|
*
|
||||||
|
* Temporary attributes provide a way to label or attach arbitrary data to
|
||||||
|
* individual GATKSAMRecords. These attributes exist in memory only,
|
||||||
|
* and are never written to disk.
|
||||||
|
*
|
||||||
|
* @param key
|
||||||
|
* @param value
|
||||||
|
*/
|
||||||
|
public Object setTemporaryAttribute(Object key, Object value) {
|
||||||
|
if(temporaryAttributes == null) {
|
||||||
|
temporaryAttributes = new HashMap<Object, Object>();
|
||||||
|
}
|
||||||
|
return temporaryAttributes.put(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks up the value associated with the given key.
|
||||||
|
*
|
||||||
|
* Temporary attributes provide a way to label or attach arbitrary data to
|
||||||
|
* individual GATKSAMRecords. These attributes exist in memory only,
|
||||||
|
* and are never written to disk.
|
||||||
|
*
|
||||||
|
* @param key
|
||||||
|
* @return The value, or null.
|
||||||
|
*/
|
||||||
|
public Object getTemporaryAttribute(Object key) {
|
||||||
|
if(temporaryAttributes != null) {
|
||||||
|
return temporaryAttributes.get(key);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes the attribute that has the given key.
|
||||||
|
*
|
||||||
|
* Temporary attributes provide a way to label or attach arbitrary data to
|
||||||
|
* individual GATKSAMRecords. These attributes exist in memory only,
|
||||||
|
* and are never written to disk.
|
||||||
|
*
|
||||||
|
* @param key
|
||||||
|
* @return The value that was associated with this key, or null.
|
||||||
|
*/
|
||||||
|
public Object removeTemporaryAttribute(Object key) {
|
||||||
|
if(temporaryAttributes != null) {
|
||||||
|
return temporaryAttributes.remove(key);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
// *** The following methods are final and cannot be overridden ***//
|
// *** The following methods are final and cannot be overridden ***//
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
|
@ -209,8 +306,6 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
|
|
||||||
public boolean getFirstOfPairFlag() { return mRecord.getFirstOfPairFlag(); }
|
public boolean getFirstOfPairFlag() { return mRecord.getFirstOfPairFlag(); }
|
||||||
|
|
||||||
public boolean getSecondOfPairFlag() { return mRecord.getSecondOfPairFlag(); }
|
|
||||||
|
|
||||||
public boolean getNotPrimaryAlignmentFlag() { return mRecord.getNotPrimaryAlignmentFlag(); }
|
public boolean getNotPrimaryAlignmentFlag() { return mRecord.getNotPrimaryAlignmentFlag(); }
|
||||||
|
|
||||||
public boolean getReadFailsVendorQualityCheckFlag() { return mRecord.getReadFailsVendorQualityCheckFlag(); }
|
public boolean getReadFailsVendorQualityCheckFlag() { return mRecord.getReadFailsVendorQualityCheckFlag(); }
|
||||||
|
|
@ -227,8 +322,6 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
|
|
||||||
public void setFirstOfPairFlag(boolean b) { mRecord.setFirstOfPairFlag(b); }
|
public void setFirstOfPairFlag(boolean b) { mRecord.setFirstOfPairFlag(b); }
|
||||||
|
|
||||||
public void setSecondOfPairFlag(boolean b) { mRecord.setSecondOfPairFlag(b); }
|
|
||||||
|
|
||||||
public void setNotPrimaryAlignmentFlag(boolean b) { mRecord.setNotPrimaryAlignmentFlag(b); }
|
public void setNotPrimaryAlignmentFlag(boolean b) { mRecord.setNotPrimaryAlignmentFlag(b); }
|
||||||
|
|
||||||
public void setReadFailsVendorQualityCheckFlag(boolean b) { mRecord.setReadFailsVendorQualityCheckFlag(b); }
|
public void setReadFailsVendorQualityCheckFlag(boolean b) { mRecord.setReadFailsVendorQualityCheckFlag(b); }
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue