Overwhelming evidence that maxQ = 50 is now a better default than maxQ = 40 in the base quality score recalibrator, especially when combined with dbsnp build 132. Also, added option in ProduceBeagleInputWalker for Beagle-ing chromosome X calls with male samples which sets the genotype likelihood for the AB allele to zero for those samples.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4731 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ca70ed611c
commit
ed08899abc
|
|
@ -67,7 +67,7 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
|
||||
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
|
||||
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 40")
|
||||
private int MAX_QUALITY_SCORE = 40;
|
||||
private int MAX_QUALITY_SCORE = 50;
|
||||
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
|
||||
private int MAX_HISTOGRAM_VALUE = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,8 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
public double bootstrap = 0.0;
|
||||
@Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false)
|
||||
VCFWriter bootstrapVCFOutput = null;
|
||||
@Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false)
|
||||
public boolean CHECK_IS_MALE_ON_CHR_X = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "variant_genotype_ptrue", shortName = "varp", doc = "Flat probability prior to assign to variant (not validation) genotypes. Does not override GL field.", required = false)
|
||||
|
|
@ -185,6 +187,10 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
|
||||
boolean isValidation;
|
||||
for ( String sample : samples ) {
|
||||
boolean isMaleOnChrX = false;
|
||||
if( CHECK_IS_MALE_ON_CHR_X && getToolkit().getSampleById(sample).isMale() ) {
|
||||
isMaleOnChrX = true;
|
||||
}
|
||||
Genotype genotype;
|
||||
// use sample as key into genotypes structure
|
||||
if ( preferredGenotypes.keySet().contains(sample) ) {
|
||||
|
|
@ -202,6 +208,9 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
*/
|
||||
if ( (isValidation && prior < 0.0) || genotype.isCalled() && genotype.hasLikelihoods()) {
|
||||
double[] likeArray = genotype.getLikelihoods().getAsVector();
|
||||
if( isMaleOnChrX ) {
|
||||
likeArray[1] = -255;
|
||||
}
|
||||
double[] normalizedLikelihoods = MathUtils.normalizeFromLog10(likeArray);
|
||||
// see if we need to randomly mask out genotype in this position.
|
||||
Double d = generator.nextDouble();
|
||||
|
|
@ -212,7 +221,11 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
else {
|
||||
// we are masking out this genotype
|
||||
beagleWriter.print("0.33 0.33 0.33 ");
|
||||
if( isMaleOnChrX ) {
|
||||
beagleWriter.print("0.5 0.0 0.5 ");
|
||||
} else {
|
||||
beagleWriter.print("0.33 0.33 0.33 ");
|
||||
}
|
||||
}
|
||||
|
||||
if (beagleGenotypesWriter != null) {
|
||||
|
|
@ -236,7 +249,11 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
else if (genotype.isHet()) { AB = prior; }
|
||||
else if (genotype.isHomVar()) { BB = prior; }
|
||||
|
||||
beagleWriter.printf("%.2f %.2f %.2f ", AA, AB, BB);
|
||||
if( isMaleOnChrX ) {
|
||||
beagleWriter.printf("%.2f %.2f %.2f ", AA, 0.0, BB);
|
||||
} else {
|
||||
beagleWriter.printf("%.2f %.2f %.2f ", AA, AB, BB);
|
||||
}
|
||||
|
||||
if (beagleGenotypesWriter != null) {
|
||||
char a = genotype.getAllele(0).toString().charAt(0);
|
||||
|
|
@ -246,7 +263,11 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
else {
|
||||
beagleWriter.print("0.33 0.33 0.33 "); // write 1/3 likelihoods for uncalled genotypes.
|
||||
if( isMaleOnChrX ) {
|
||||
beagleWriter.print("0.5 0.0 0.5 ");
|
||||
} else {
|
||||
beagleWriter.print("0.33 0.33 0.33 ");
|
||||
} // write 1/3 likelihoods for uncalled genotypes.
|
||||
if (beagleGenotypesWriter != null)
|
||||
beagleGenotypesWriter.print(". . ");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points, default=1")
|
||||
private int SMOOTHING = 1;
|
||||
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default=40")
|
||||
private int MAX_QUALITY_SCORE = 40;
|
||||
private int MAX_QUALITY_SCORE = 50;
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
|
|
|
|||
|
|
@ -54,9 +54,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
public void testTableRecalibrator1() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "da79dcc74336a6333f75aeb231590436" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "6c5ac8f71a77f09eec3e38932ba274cf");
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "4d24b4683a9b20e93837da75fff2489f");
|
||||
e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b26a07cc3d3f6a607e786ebcc2c9c8ac" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "504152c1a94d6998af9785bcbddbb3d6" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "165e93b7713d099beb5919b595f41321" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -278,7 +278,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibratorNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "a2d7b780052fe31b0d37fc6462ab58ca" );
|
||||
e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "528f8d8812e72296d427af795d94eccc" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -333,7 +333,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTableRecalibratorNoIndex() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "4ae0b3d4e2f6a7c1c60ae64944afff5e" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "bd29f127ac19d8eb2108394fa2a889ce" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
|
|||
Loading…
Reference in New Issue