Fix for monsterous problems with solid data -- now can dynamically expand recalibration tables on the fly as reads declare additional read groups -- use assumeFaultyHeader flag
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1167 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bcda66d2db
commit
cf1854b339
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import javax.management.RuntimeErrorException;
|
||||
import java.util.*;
|
||||
|
|
@ -9,19 +10,27 @@ import java.util.*;
|
|||
public class CovariateCounter {
|
||||
private boolean collapsePos = false;
|
||||
private boolean collapseDinuc = false;
|
||||
private boolean assumeFaultyHeader = false;
|
||||
|
||||
private HashMap<String, RecalDataManager> data = new HashMap<String, RecalDataManager>();
|
||||
|
||||
public CovariateCounter( Set<String> readGroups, boolean collapsePos, boolean collapseDinuc ) {
|
||||
protected static Logger logger = Logger.getLogger(CovariateCounter.class);
|
||||
|
||||
public CovariateCounter( Set<String> readGroups, boolean collapsePos, boolean collapseDinuc, boolean assumeFaultyHeader ) {
|
||||
this.collapsePos = collapsePos;
|
||||
this.collapseDinuc = collapseDinuc;
|
||||
this.assumeFaultyHeader = assumeFaultyHeader;
|
||||
|
||||
for (String readGroup : readGroups ) {
|
||||
RecalDataManager manager = new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc );
|
||||
RecalDataManager manager = makeManager(readGroup);
|
||||
data.put(readGroup, manager);
|
||||
}
|
||||
}
|
||||
|
||||
private RecalDataManager makeManager(final String readGroup) {
|
||||
return new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of readGroup names we are counting covariates for
|
||||
* @return
|
||||
|
|
@ -60,6 +69,17 @@ public class CovariateCounter {
|
|||
* @return
|
||||
*/
|
||||
public RecalData getRecalData(String readGroup, int pos, int qual, char prevBase, char base) {
|
||||
if ( ! data.containsKey(readGroup) ) {
|
||||
if ( assumeFaultyHeader ) {
|
||||
logger.info(String.format("Found unexpected read group, but assuming the header was bad, so extending covariates with read group %s", readGroup));
|
||||
RecalDataManager manager = makeManager(readGroup);
|
||||
data.put(readGroup, manager);
|
||||
} else {
|
||||
throw new RuntimeException(String.format("Unexpected read group %s found, there's something wrong with your BAM file's header", readGroup));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
byte[] cs = {(byte)prevBase, (byte)base};
|
||||
String s = new String(cs);
|
||||
return data.get(readGroup).expandingGetRecalData(pos, qual, s, true);
|
||||
|
|
|
|||
|
|
@ -29,6 +29,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
public List<String> platforms = Collections.singletonList("*");
|
||||
//public List<String> platforms = Collections.singletonList("ILLUMINA");
|
||||
|
||||
@Argument(fullName="assumeFaultyHeader", required=false, doc="")
|
||||
public boolean assumeFaultyHeader = false;
|
||||
|
||||
//@Argument(fullName="collapsePos", shortName="collapsePos", required=false, doc="")
|
||||
public boolean collapsePos = false;
|
||||
|
||||
|
|
@ -55,7 +58,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
readGroups.add(readGroup.getReadGroupId());
|
||||
}
|
||||
|
||||
covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc);
|
||||
covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc, assumeFaultyHeader);
|
||||
logger.info(String.format("Created recalibration data collectors for %d read group(s)", covariateCounter.getNReadGroups()));
|
||||
}
|
||||
|
||||
|
|
@ -88,11 +91,17 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format());
|
||||
}
|
||||
|
||||
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup((String)read.getAttribute("RG"));
|
||||
final String readGroupString = ((String)read.getAttribute("RG"));
|
||||
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup(readGroupString);
|
||||
|
||||
if ( readGroupString == null ) {
|
||||
throw new RuntimeException("No read group annotation found for read " + read.format());
|
||||
}
|
||||
|
||||
if ((read.getMappingQuality() >= MIN_MAPPING_QUALITY && isSupportedReadGroup(readGroup) )) {
|
||||
int offset = offsets.get(i);
|
||||
if ( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they suck and they don't have a dinuc count
|
||||
counted_bases += covariateCounter.updateDataFromRead(readGroup.getReadGroupId(), read, offset, ref);
|
||||
counted_bases += covariateCounter.updateDataFromRead(readGroupString, read, offset, ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -115,7 +124,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private boolean isSupportedReadGroup( SAMReadGroupRecord readGroup ) {
|
||||
for( String platform: platforms ) {
|
||||
platform = platform.trim();
|
||||
if( platform.equals("*") ||
|
||||
if( platform.equals("*") || readGroup == null ||
|
||||
readGroup.getAttribute("PL") == null ||
|
||||
readGroup.getAttribute("PL").toString().equalsIgnoreCase(platform) )
|
||||
return true;
|
||||
|
|
|
|||
Loading…
Reference in New Issue