diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java index da9ea62de..3d9214bc0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; +import org.apache.log4j.Logger; import javax.management.RuntimeErrorException; import java.util.*; @@ -9,19 +10,27 @@ import java.util.*; public class CovariateCounter { private boolean collapsePos = false; private boolean collapseDinuc = false; + private boolean assumeFaultyHeader = false; private HashMap data = new HashMap(); - public CovariateCounter( Set readGroups, boolean collapsePos, boolean collapseDinuc ) { + protected static Logger logger = Logger.getLogger(CovariateCounter.class); + + public CovariateCounter( Set readGroups, boolean collapsePos, boolean collapseDinuc, boolean assumeFaultyHeader ) { this.collapsePos = collapsePos; this.collapseDinuc = collapseDinuc; + this.assumeFaultyHeader = assumeFaultyHeader; for (String readGroup : readGroups ) { - RecalDataManager manager = new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc ); + RecalDataManager manager = makeManager(readGroup); data.put(readGroup, manager); } } + private RecalDataManager makeManager(final String readGroup) { + return new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc ); + } + /** * Returns the set of readGroup names we are counting covariates for * @return @@ -60,6 +69,17 @@ public class CovariateCounter { * @return */ public RecalData getRecalData(String readGroup, int pos, int qual, char prevBase, char base) { + if ( ! data.containsKey(readGroup) ) { + if ( assumeFaultyHeader ) { + logger.info(String.format("Found unexpected read group, but assuming the header was bad, so extending covariates with read group %s", readGroup)); + RecalDataManager manager = makeManager(readGroup); + data.put(readGroup, manager); + } else { + throw new RuntimeException(String.format("Unexpected read group %s found, there's something wrong with your BAM file's header", readGroup)); + } + } + + byte[] cs = {(byte)prevBase, (byte)base}; String s = new String(cs); return data.get(readGroup).expandingGetRecalData(pos, qual, s, true); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index a2b41b6a2..d6cf705e3 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -29,6 +29,9 @@ public class CovariateCounterWalker extends LocusWalker { public List platforms = Collections.singletonList("*"); //public List platforms = Collections.singletonList("ILLUMINA"); + @Argument(fullName="assumeFaultyHeader", required=false, doc="") + public boolean assumeFaultyHeader = false; + //@Argument(fullName="collapsePos", shortName="collapsePos", required=false, doc="") public boolean collapsePos = false; @@ -55,7 +58,7 @@ public class CovariateCounterWalker extends LocusWalker { readGroups.add(readGroup.getReadGroupId()); } - covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc); + covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc, assumeFaultyHeader); logger.info(String.format("Created recalibration data collectors for %d read group(s)", covariateCounter.getNReadGroups())); } @@ -88,11 +91,17 @@ public class CovariateCounterWalker extends LocusWalker { throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format()); } - SAMReadGroupRecord readGroup = read.getHeader().getReadGroup((String)read.getAttribute("RG")); + final String readGroupString = ((String)read.getAttribute("RG")); + SAMReadGroupRecord readGroup = read.getHeader().getReadGroup(readGroupString); + + if ( readGroupString == null ) { + throw new RuntimeException("No read group annotation found for read " + read.format()); + } + if ((read.getMappingQuality() >= MIN_MAPPING_QUALITY && isSupportedReadGroup(readGroup) )) { int offset = offsets.get(i); if ( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they suck and they don't have a dinuc count - counted_bases += covariateCounter.updateDataFromRead(readGroup.getReadGroupId(), read, offset, ref); + counted_bases += covariateCounter.updateDataFromRead(readGroupString, read, offset, ref); } } } @@ -115,7 +124,7 @@ public class CovariateCounterWalker extends LocusWalker { private boolean isSupportedReadGroup( SAMReadGroupRecord readGroup ) { for( String platform: platforms ) { platform = platform.trim(); - if( platform.equals("*") || + if( platform.equals("*") || readGroup == null || readGroup.getAttribute("PL") == null || readGroup.getAttribute("PL").toString().equalsIgnoreCase(platform) ) return true;