Put base insertion and base deletions in the SAMRecord as a string of quality scores instead of an array of bytes. Start of a proper genotype given alleles mode in HaplotypeCaller

This commit is contained in:
Ryan Poplin 2012-02-14 14:01:04 -05:00
parent 8742f5e36c
commit ae5b42c884
4 changed files with 13 additions and 12 deletions

View File

@ -58,6 +58,7 @@ public class RecalDataManager {
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color

View File

@ -109,6 +109,10 @@ public class Haplotype {
return isReference; return isReference;
} }
public byte[] insertAllele( final Allele a ) {
return getBases();
}
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref, public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
final int haplotypeSize, final int numPrefBases) { final int haplotypeSize, final int numPrefBases) {

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.text.XReadLines;
@ -189,13 +188,12 @@ public class BaseRecalibration {
for( int offset = 0; offset < read.getReadLength(); offset++ ) { for( int offset = 0; offset < read.getReadLength(); offset++ ) {
final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel); final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey); Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
if( qualityScore == null ) { if( qualityScore == null ) {
qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey ); qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey); qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
} }
recalQuals[offset] = qualityScore; recalQuals[offset] = qualityScore;

View File

@ -171,10 +171,10 @@ public class GATKSAMRecord extends BAMRecord {
setBaseQualities(quals); setBaseQualities(quals);
break; break;
case BASE_INSERTION: case BASE_INSERTION:
setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals ); setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, SAMUtils.phredToFastq(quals) );
break; break;
case BASE_DELETION: case BASE_DELETION:
setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals ); setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, SAMUtils.phredToFastq(quals) );
break; break;
default: default:
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel ); throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );
@ -195,23 +195,23 @@ public class GATKSAMRecord extends BAMRecord {
} }
public byte[] getBaseInsertionQualities() { public byte[] getBaseInsertionQualities() {
byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES ); byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
if( quals == null ) { if( quals == null ) {
quals = new byte[getBaseQualities().length]; quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals ); setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
} }
return quals; return quals;
} }
public byte[] getBaseDeletionQualities() { public byte[] getBaseDeletionQualities() {
byte[] quals = getByteArrayAttribute( BQSR_BASE_DELETION_QUALITIES ); byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_DELETION_QUALITIES ) );
if( quals == null ) { if( quals == null ) {
quals = new byte[getBaseQualities().length]; quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
setAttribute( BQSR_BASE_DELETION_QUALITIES, quals ); setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
} }
return quals; return quals;
} }
@ -259,12 +259,10 @@ public class GATKSAMRecord extends BAMRecord {
return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE); return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// *** GATKSAMRecord specific methods ***// // *** GATKSAMRecord specific methods ***//
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
/** /**
* Checks whether an attribute has been set for the given key. * Checks whether an attribute has been set for the given key.
* *