Fix for monsterous problems with solid data -- now can dynamically expand recalibration tables on the fly as reads declare additional read groups -- use assumeFaultyHeader flag
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1167 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bcda66d2db
commit
cf1854b339
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import javax.management.RuntimeErrorException;
|
import javax.management.RuntimeErrorException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -9,19 +10,27 @@ import java.util.*;
|
||||||
public class CovariateCounter {
|
public class CovariateCounter {
|
||||||
private boolean collapsePos = false;
|
private boolean collapsePos = false;
|
||||||
private boolean collapseDinuc = false;
|
private boolean collapseDinuc = false;
|
||||||
|
private boolean assumeFaultyHeader = false;
|
||||||
|
|
||||||
private HashMap<String, RecalDataManager> data = new HashMap<String, RecalDataManager>();
|
private HashMap<String, RecalDataManager> data = new HashMap<String, RecalDataManager>();
|
||||||
|
|
||||||
public CovariateCounter( Set<String> readGroups, boolean collapsePos, boolean collapseDinuc ) {
|
protected static Logger logger = Logger.getLogger(CovariateCounter.class);
|
||||||
|
|
||||||
|
public CovariateCounter( Set<String> readGroups, boolean collapsePos, boolean collapseDinuc, boolean assumeFaultyHeader ) {
|
||||||
this.collapsePos = collapsePos;
|
this.collapsePos = collapsePos;
|
||||||
this.collapseDinuc = collapseDinuc;
|
this.collapseDinuc = collapseDinuc;
|
||||||
|
this.assumeFaultyHeader = assumeFaultyHeader;
|
||||||
|
|
||||||
for (String readGroup : readGroups ) {
|
for (String readGroup : readGroups ) {
|
||||||
RecalDataManager manager = new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc );
|
RecalDataManager manager = makeManager(readGroup);
|
||||||
data.put(readGroup, manager);
|
data.put(readGroup, manager);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private RecalDataManager makeManager(final String readGroup) {
|
||||||
|
return new RecalDataManager(readGroup, ! collapsePos, ! collapseDinuc );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the set of readGroup names we are counting covariates for
|
* Returns the set of readGroup names we are counting covariates for
|
||||||
* @return
|
* @return
|
||||||
|
|
@ -60,6 +69,17 @@ public class CovariateCounter {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public RecalData getRecalData(String readGroup, int pos, int qual, char prevBase, char base) {
|
public RecalData getRecalData(String readGroup, int pos, int qual, char prevBase, char base) {
|
||||||
|
if ( ! data.containsKey(readGroup) ) {
|
||||||
|
if ( assumeFaultyHeader ) {
|
||||||
|
logger.info(String.format("Found unexpected read group, but assuming the header was bad, so extending covariates with read group %s", readGroup));
|
||||||
|
RecalDataManager manager = makeManager(readGroup);
|
||||||
|
data.put(readGroup, manager);
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException(String.format("Unexpected read group %s found, there's something wrong with your BAM file's header", readGroup));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
byte[] cs = {(byte)prevBase, (byte)base};
|
byte[] cs = {(byte)prevBase, (byte)base};
|
||||||
String s = new String(cs);
|
String s = new String(cs);
|
||||||
return data.get(readGroup).expandingGetRecalData(pos, qual, s, true);
|
return data.get(readGroup).expandingGetRecalData(pos, qual, s, true);
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
public List<String> platforms = Collections.singletonList("*");
|
public List<String> platforms = Collections.singletonList("*");
|
||||||
//public List<String> platforms = Collections.singletonList("ILLUMINA");
|
//public List<String> platforms = Collections.singletonList("ILLUMINA");
|
||||||
|
|
||||||
|
@Argument(fullName="assumeFaultyHeader", required=false, doc="")
|
||||||
|
public boolean assumeFaultyHeader = false;
|
||||||
|
|
||||||
//@Argument(fullName="collapsePos", shortName="collapsePos", required=false, doc="")
|
//@Argument(fullName="collapsePos", shortName="collapsePos", required=false, doc="")
|
||||||
public boolean collapsePos = false;
|
public boolean collapsePos = false;
|
||||||
|
|
||||||
|
|
@ -55,7 +58,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
readGroups.add(readGroup.getReadGroupId());
|
readGroups.add(readGroup.getReadGroupId());
|
||||||
}
|
}
|
||||||
|
|
||||||
covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc);
|
covariateCounter = new CovariateCounter(readGroups, collapsePos, collapseDinuc, assumeFaultyHeader);
|
||||||
logger.info(String.format("Created recalibration data collectors for %d read group(s)", covariateCounter.getNReadGroups()));
|
logger.info(String.format("Created recalibration data collectors for %d read group(s)", covariateCounter.getNReadGroups()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -88,11 +91,17 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format());
|
throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format());
|
||||||
}
|
}
|
||||||
|
|
||||||
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup((String)read.getAttribute("RG"));
|
final String readGroupString = ((String)read.getAttribute("RG"));
|
||||||
|
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup(readGroupString);
|
||||||
|
|
||||||
|
if ( readGroupString == null ) {
|
||||||
|
throw new RuntimeException("No read group annotation found for read " + read.format());
|
||||||
|
}
|
||||||
|
|
||||||
if ((read.getMappingQuality() >= MIN_MAPPING_QUALITY && isSupportedReadGroup(readGroup) )) {
|
if ((read.getMappingQuality() >= MIN_MAPPING_QUALITY && isSupportedReadGroup(readGroup) )) {
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
if ( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they suck and they don't have a dinuc count
|
if ( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they suck and they don't have a dinuc count
|
||||||
counted_bases += covariateCounter.updateDataFromRead(readGroup.getReadGroupId(), read, offset, ref);
|
counted_bases += covariateCounter.updateDataFromRead(readGroupString, read, offset, ref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -115,7 +124,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
private boolean isSupportedReadGroup( SAMReadGroupRecord readGroup ) {
|
private boolean isSupportedReadGroup( SAMReadGroupRecord readGroup ) {
|
||||||
for( String platform: platforms ) {
|
for( String platform: platforms ) {
|
||||||
platform = platform.trim();
|
platform = platform.trim();
|
||||||
if( platform.equals("*") ||
|
if( platform.equals("*") || readGroup == null ||
|
||||||
readGroup.getAttribute("PL") == null ||
|
readGroup.getAttribute("PL") == null ||
|
||||||
readGroup.getAttribute("PL").toString().equalsIgnoreCase(platform) )
|
readGroup.getAttribute("PL").toString().equalsIgnoreCase(platform) )
|
||||||
return true;
|
return true;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue