From a7c306f75746dfe81db3ab079bd33058b22c083d Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 16 Sep 2009 16:44:57 +0000 Subject: [PATCH] -deal with offsets that can be -1 -added option to have "D"s inserted for deleted bases in pileup strings git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1635 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/BasicPileup.java | 36 ++++++++++++++++--- .../sting/utils/ReadBackedPileup.java | 30 +++++++++------- 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/BasicPileup.java b/java/src/org/broadinstitute/sting/utils/BasicPileup.java index 5d439a228..bb2066465 100755 --- a/java/src/org/broadinstitute/sting/utils/BasicPileup.java +++ b/java/src/org/broadinstitute/sting/utils/BasicPileup.java @@ -37,13 +37,25 @@ abstract public class BasicPileup implements Pileup { } public static String baseWithStrandPileupAsString( List reads, List offsets ) { + return baseWithStrandPileupAsString( reads, offsets, false ); + } + + public static String baseWithStrandPileupAsString( List reads, List offsets, boolean includeDeletions ) { StringBuilder bases = new StringBuilder(); for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); int offset = offsets.get(i); - char base = (char) read.getReadBases()[offset]; + char base; + if ( offset == -1 ) { + if ( includeDeletions ) + base = 'D'; + else + continue; + } else { + base = (char) read.getReadBases()[offset]; + } base = Character.toUpperCase(base); if (read.getReadNegativeStrandFlag()) { @@ -57,11 +69,20 @@ abstract public class BasicPileup implements Pileup { } public static ArrayList basePileup( List reads, List offsets ) { + return basePileup( reads, offsets, false ); + } + + public static ArrayList basePileup( List reads, List offsets, boolean includeDeletions ) { ArrayList bases = new ArrayList(reads.size()); for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); int offset = offsets.get(i); - bases.add(read.getReadBases()[offset]); + if ( offset == -1 ) { + if ( includeDeletions ) + bases.add((byte)'D'); + } else { + bases.add(read.getReadBases()[offset]); + } } return bases; } @@ -71,6 +92,9 @@ abstract public class BasicPileup implements Pileup { for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); int offset = offsets.get(i); + // skip deletion sites + if ( offset == -1 ) + continue; byte qual = (byte)read.getBaseQualities()[offset]; quals.add(qual); } @@ -117,7 +141,7 @@ abstract public class BasicPileup implements Pileup { byte[] compressedQuals = (byte[]) read.getAttribute("SQ"); byte base2; - if (compressedQuals != null && compressedQuals.length == read.getReadLength()) { + if (offset != -1 && compressedQuals != null && compressedQuals.length == read.getReadLength()) { base2 = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset])); hasAtLeastOneSQField = true; } else { @@ -168,7 +192,7 @@ abstract public class BasicPileup implements Pileup { byte[] compressedQuals = (byte[]) read.getAttribute("SQ"); byte qual2; - if (compressedQuals != null) { + if (offset != -1 && compressedQuals != null) { qual2 = QualityUtils.probToQual(QualityUtils.compressedQualityToProb(compressedQuals[offset])); hasAtLeastOneSQField = true; } else { @@ -201,6 +225,8 @@ abstract public class BasicPileup implements Pileup { String bases = read.getReadString(); int offset = offsets.get(readIndex); + if ( offset == -1 ) + continue; int bestBaseIndex = BaseUtils.simpleBaseToBaseIndex(bases.charAt(offset)); @@ -261,7 +287,7 @@ abstract public class BasicPileup implements Pileup { { SAMRecord read = reads.get(i); Cigar cigar = read.getCigar(); - int offset = offsets.get(i); + int offset = offsets.get(i); String cigar_string = read.getCigarString(); if (! (cigar_string.contains("I") || cigar_string.contains("D"))) { indels[i] = "null"; continue; } diff --git a/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java index 3d5b8e320..60a2fecaf 100755 --- a/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java @@ -17,6 +17,7 @@ public class ReadBackedPileup extends BasicPileup { char ref; List reads; List offsets; + boolean includeDeletions = false; public ReadBackedPileup(char ref, AlignmentContext context ) { this(context.getLocation(), ref, context.getReads(), context.getOffsets()); @@ -37,6 +38,8 @@ public class ReadBackedPileup extends BasicPileup { public List getReads() { return reads; } public List getOffsets() { return offsets; } + public void includeDeletionsInPileupString() { includeDeletions = true; } + public GenomeLoc getLocation() { return loc; } @@ -81,18 +84,21 @@ public class ReadBackedPileup extends BasicPileup { public String getBasePileupAsCountsString() { String bases = basePileupAsString(reads, offsets); - int[] counts = new int[4]; - for (int i = 0; i < reads.size(); i++) - { - char base = Character.toUpperCase((char)(reads.get(i).getReadBases()[offsets.get(i)])); - if (BaseUtils.simpleBaseToBaseIndex(base) == -1) { continue; } - counts[BaseUtils.simpleBaseToBaseIndex(base)]++; - } - return String.format("A[%d] C[%d] G[%d] T[%d]", - counts[0], - counts[1], - counts[2], - counts[3]); + int[] counts = new int[4]; + for (int i = 0; i < reads.size(); i++) + { + // skip deletion sites + if ( offsets.get(i) == -1 ) + continue; + char base = Character.toUpperCase((char)(reads.get(i).getReadBases()[offsets.get(i)])); + if (BaseUtils.simpleBaseToBaseIndex(base) == -1) { continue; } + counts[BaseUtils.simpleBaseToBaseIndex(base)]++; + } + return String.format("A[%d] C[%d] G[%d] T[%d]", + counts[0], + counts[1], + counts[2], + counts[3]); } public String getProbDistPileup() {