Overwhelming evidence that maxQ = 50 is now a better default than maxQ = 40 in the base quality score recalibrator, especially when combined with dbsnp build 132. Also, added option in ProduceBeagleInputWalker for Beagle-ing chromosome X calls with male samples which sets the genotype likelihood for the AB allele to zero for those samples.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4731 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-11-24 21:32:26 +00:00
parent ca70ed611c
commit ed08899abc
4 changed files with 30 additions and 9 deletions

View File

@ -67,7 +67,7 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 40")
private int MAX_QUALITY_SCORE = 40;
private int MAX_QUALITY_SCORE = 50;
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
private int MAX_HISTOGRAM_VALUE = 0;

View File

@ -71,6 +71,8 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
public double bootstrap = 0.0;
@Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false)
VCFWriter bootstrapVCFOutput = null;
@Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false)
public boolean CHECK_IS_MALE_ON_CHR_X = false;
@Hidden
@Argument(fullName = "variant_genotype_ptrue", shortName = "varp", doc = "Flat probability prior to assign to variant (not validation) genotypes. Does not override GL field.", required = false)
@ -185,6 +187,10 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
boolean isValidation;
for ( String sample : samples ) {
boolean isMaleOnChrX = false;
if( CHECK_IS_MALE_ON_CHR_X && getToolkit().getSampleById(sample).isMale() ) {
isMaleOnChrX = true;
}
Genotype genotype;
// use sample as key into genotypes structure
if ( preferredGenotypes.keySet().contains(sample) ) {
@ -202,6 +208,9 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
*/
if ( (isValidation && prior < 0.0) || genotype.isCalled() && genotype.hasLikelihoods()) {
double[] likeArray = genotype.getLikelihoods().getAsVector();
if( isMaleOnChrX ) {
likeArray[1] = -255;
}
double[] normalizedLikelihoods = MathUtils.normalizeFromLog10(likeArray);
// see if we need to randomly mask out genotype in this position.
Double d = generator.nextDouble();
@ -212,7 +221,11 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
}
else {
// we are masking out this genotype
beagleWriter.print("0.33 0.33 0.33 ");
if( isMaleOnChrX ) {
beagleWriter.print("0.5 0.0 0.5 ");
} else {
beagleWriter.print("0.33 0.33 0.33 ");
}
}
if (beagleGenotypesWriter != null) {
@ -236,7 +249,11 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
else if (genotype.isHet()) { AB = prior; }
else if (genotype.isHomVar()) { BB = prior; }
beagleWriter.printf("%.2f %.2f %.2f ", AA, AB, BB);
if( isMaleOnChrX ) {
beagleWriter.printf("%.2f %.2f %.2f ", AA, 0.0, BB);
} else {
beagleWriter.printf("%.2f %.2f %.2f ", AA, AB, BB);
}
if (beagleGenotypesWriter != null) {
char a = genotype.getAllele(0).toString().charAt(0);
@ -246,7 +263,11 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
}
}
else {
beagleWriter.print("0.33 0.33 0.33 "); // write 1/3 likelihoods for uncalled genotypes.
if( isMaleOnChrX ) {
beagleWriter.print("0.5 0.0 0.5 ");
} else {
beagleWriter.print("0.33 0.33 0.33 ");
} // write 1/3 likelihoods for uncalled genotypes.
if (beagleGenotypesWriter != null)
beagleGenotypesWriter.print(". . ");
}

View File

@ -97,7 +97,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points, default=1")
private int SMOOTHING = 1;
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default=40")
private int MAX_QUALITY_SCORE = 40;
private int MAX_QUALITY_SCORE = 50;
/////////////////////////////
// Debugging-only Arguments

View File

@ -54,9 +54,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "da79dcc74336a6333f75aeb231590436" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "6c5ac8f71a77f09eec3e38932ba274cf");
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "4d24b4683a9b20e93837da75fff2489f");
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b26a07cc3d3f6a607e786ebcc2c9c8ac" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "504152c1a94d6998af9785bcbddbb3d6" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "165e93b7713d099beb5919b595f41321" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -278,7 +278,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "a2d7b780052fe31b0d37fc6462ab58ca" );
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "528f8d8812e72296d427af795d94eccc" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -333,7 +333,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoIndex() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "4ae0b3d4e2f6a7c1c60ae64944afff5e" );
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "bd29f127ac19d8eb2108394fa2a889ce" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();