Fix for monsterous problems with solid data -- now can dynamically expand recalibration tables on the fly as reads declare additional read groups -- use assumeFaultyHeader flag

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1167 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-07-03 17:15:49 +00:00
parent bcda66d2db
commit cf1854b339
2 changed files with 35 additions and 6 deletions

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils;
import org.apache.log4j.Logger;
import javax.management.RuntimeErrorException;
import java.util.*;
@ -9,19 +10,27 @@ import java.util.*;
public class CovariateCounter {
private boolean collapsePos = false;
private boolean collapseDinuc = false;
private boolean assumeFaultyHeader = false;
private HashMap<String, RecalDataManager> data = new HashMap<String, RecalDataManager>();
public CovariateCounter( Set<String> readGroups, boolean collapsePos, boolean collapseDinuc ) {
protected static Logger logger = Logger.getLogger(CovariateCounter.class);
public CovariateCounter( Set<String> readGroups, boolean collapsePos, boolean collapseDinuc, boolean assumeFaultyHeader ) {
this.collapsePos = collapsePos;
this.collapseDinuc = collapseDinuc;
this.assumeFaultyHeader = assumeFaultyHeader;
for (String readGroup : readGroups ) {
RecalDataManager manager = new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc );
RecalDataManager manager = makeManager(readGroup);
data.put(readGroup, manager);
}
}
private RecalDataManager makeManager(final String readGroup) {
return new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc );
}
/**
* Returns the set of readGroup names we are counting covariates for
* @return
@ -60,6 +69,17 @@ public class CovariateCounter {
* @return
*/
public RecalData getRecalData(String readGroup, int pos, int qual, char prevBase, char base) {
if ( ! data.containsKey(readGroup) ) {
if ( assumeFaultyHeader ) {
logger.info(String.format("Found unexpected read group, but assuming the header was bad, so extending covariates with read group %s", readGroup));
RecalDataManager manager = makeManager(readGroup);
data.put(readGroup, manager);
} else {
throw new RuntimeException(String.format("Unexpected read group %s found, there's something wrong with your BAM file's header", readGroup));
}
}
byte[] cs = {(byte)prevBase, (byte)base};
String s = new String(cs);
return data.get(readGroup).expandingGetRecalData(pos, qual, s, true);

View File

@ -29,6 +29,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
public List<String> platforms = Collections.singletonList("*");
//public List<String> platforms = Collections.singletonList("ILLUMINA");
@Argument(fullName="assumeFaultyHeader", required=false, doc="")
public boolean assumeFaultyHeader = false;
//@Argument(fullName="collapsePos", shortName="collapsePos", required=false, doc="")
public boolean collapsePos = false;
@ -55,7 +58,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
readGroups.add(readGroup.getReadGroupId());
}
covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc);
covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc, assumeFaultyHeader);
logger.info(String.format("Created recalibration data collectors for %d read group(s)", covariateCounter.getNReadGroups()));
}
@ -88,11 +91,17 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format());
}
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup((String)read.getAttribute("RG"));
final String readGroupString = ((String)read.getAttribute("RG"));
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup(readGroupString);
if ( readGroupString == null ) {
throw new RuntimeException("No read group annotation found for read " + read.format());
}
if ((read.getMappingQuality() >= MIN_MAPPING_QUALITY && isSupportedReadGroup(readGroup) )) {
int offset = offsets.get(i);
if ( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they suck and they don't have a dinuc count
counted_bases += covariateCounter.updateDataFromRead(readGroup.getReadGroupId(), read, offset, ref);
counted_bases += covariateCounter.updateDataFromRead(readGroupString, read, offset, ref);
}
}
}
@ -115,7 +124,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private boolean isSupportedReadGroup( SAMReadGroupRecord readGroup ) {
for( String platform: platforms ) {
platform = platform.trim();
if( platform.equals("*") ||
if( platform.equals("*") || readGroup == null ||
readGroup.getAttribute("PL") == null ||
readGroup.getAttribute("PL").toString().equalsIgnoreCase(platform) )
return true;