diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 25e38e078..0917338fb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -48,8 +48,9 @@ public class AlleleBalance extends StandardVariantAnnotation { int aCount = Utils.countOccurrences(a, bases); int bCount = Utils.countOccurrences(b, bases); - refCount += a == ref.getBase() ? aCount : bCount; - altCount += a == ref.getBase() ? bCount : aCount; + // weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much + refCount += genotype.getNegLog10PError() * (a == ref.getBase() ? aCount : bCount); + altCount += genotype.getNegLog10PError() * (a == ref.getBase() ? bCount : aCount); } // sanity check diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index fe8b9d471..77944decb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -50,11 +50,13 @@ public abstract class GenotypeCalculationModel implements Cloneable { * @param logger logger * @param UAC unified arg collection * @param outputFormat output format + * @param verboseWriter verbose writer */ protected void initialize(Set samples, Logger logger, UnifiedArgumentCollection UAC, - GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat) { + GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, + PrintWriter verboseWriter) { this.samples = new TreeSet(samples); this.logger = logger; baseModel = UAC.baseModel; @@ -66,24 +68,14 @@ public abstract class GenotypeCalculationModel implements Cloneable { POOL_SIZE = UAC.POOLSIZE; CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD; MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY; - if ( UAC.VERBOSE != null ) { - try { - verboseWriter = new PrintWriter(UAC.VERBOSE); - initializeVerboseWriter(verboseWriter); - } catch (FileNotFoundException e) { - throw new StingException("Could not open file " + UAC.VERBOSE + " for writing"); - } - } REPORT_SLOD = ! UAC.NO_SLOD; + this.verboseWriter = verboseWriter; + if ( verboseWriter != null ) + initializeVerboseWriter(verboseWriter); } protected void initializeVerboseWriter(PrintWriter writer) { }; - public void close() { - if ( verboseWriter != null ) - verboseWriter.close(); - } - /** * Must be overridden by concrete subclasses * @param tracker rod data diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java index 2d0eaec44..ca934ea0c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.apache.log4j.Logger; import java.util.Set; +import java.io.PrintWriter; public class GenotypeCalculationModelFactory { @@ -55,7 +56,8 @@ public class GenotypeCalculationModelFactory { public static GenotypeCalculationModel makeGenotypeCalculation(Set samples, Logger logger, UnifiedArgumentCollection UAC, - GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat) { + GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, + PrintWriter verboseWriter) { GenotypeCalculationModel gcm; switch ( UAC.genotypeModel ) { case EM_POINT_ESTIMATE: @@ -70,7 +72,7 @@ public class GenotypeCalculationModelFactory { default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel); } - gcm.initialize(samples, logger, UAC, outputFormat); + gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter); return gcm; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 175bce691..6fc1c3f97 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -50,9 +50,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output all callable bases?", required = false) public boolean ALL_BASES = false; - @Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false) - public String VERBOSE = null; - @Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false) public boolean NO_SLOD = false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 626ff2f65..ee5af6f3a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -40,6 +40,8 @@ import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter; import org.broadinstitute.sting.utils.genotype.vcf.*; import java.util.*; +import java.io.PrintWriter; +import java.io.FileNotFoundException; /** @@ -56,11 +58,18 @@ public class UnifiedGenotyper extends LocusWalker gcm = new ThreadLocal(); // samples in input - private Set samples; + private Set samples = new HashSet(); /** Enable deletions in the pileup **/ @@ -75,7 +84,6 @@ public class UnifiedGenotyper extends LocusWalker 1 ) { + // no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name + if ( UAC.ASSUME_SINGLE_SAMPLE != null ) + throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads"); + // no VERBOSE because we'd need to deal with parallelizing the writing + if ( VERBOSE != null ) + throw new IllegalArgumentException("For technical reasons, the VERBOSE argument cannot be used with multiple threads"); + } - // print them out for debugging (need separate loop to ensure uniqueness) - // for ( String sample : samples ) - // logger.debug("SAMPLE: " + sample); + // get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names + if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) { + // if we're supposed to assume a single sample, do so + if ( UAC.ASSUME_SINGLE_SAMPLE != null ) + samples.add(UAC.ASSUME_SINGLE_SAMPLE); + else + samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + // for ( String sample : samples ) + // logger.debug("SAMPLE: " + sample); + } + + // initialize the verbose writer + if ( VERBOSE != null ) { + try { + verboseWriter = new PrintWriter(VERBOSE); + } catch (FileNotFoundException e) { + throw new StingException("Could not open file " + VERBOSE + " for writing"); + } + } // *** If we were called by another walker, then we don't *** // *** want to do any of the other initialization steps. *** if ( writer == null ) @@ -118,16 +145,8 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = getHeaderInfo(); - // initialize the header - GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerInfo); + GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, getHeaderInfo()); } private Set getHeaderInfo() { @@ -177,20 +196,22 @@ public class UnifiedGenotyper extends LocusWalker> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { - GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - if(writer != null) { - if(writer instanceof VCFGenotypeWriter) - format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - else if(writer instanceof GLFGenotypeWriter) - format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF; - else if(writer instanceof GeliGenotypeWriter) - format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI; - else - throw new StingException("Unsupported genotype format: " + writer.getClass().getName()); + + // initialize the GenotypeCalculationModel for this thread if that hasn't been done yet + if ( gcm.get() == null ) { + GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; + if ( writer != null ) { + if ( writer instanceof VCFGenotypeWriter ) + format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; + else if ( writer instanceof GLFGenotypeWriter ) + format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF; + else if ( writer instanceof GeliGenotypeWriter ) + format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI; + else + throw new StingException("Unsupported genotype format: " + writer.getClass().getName()); + } + gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter)); } - - if(gcm.get() == null) - gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format)); char ref = Character.toUpperCase(refContext.getBase()); if ( !BaseUtils.isRegularBase(ref) ) @@ -281,23 +302,9 @@ public class UnifiedGenotyper extends LocusWalker e = new HashMap(); - e.put( "-genotype", "c984952b91193d9550066f9514434183" ); - e.put( "-all_bases", "0062942f2323ea0f2c9d3a3739a28d20" ); - e.put( "--min_base_quality_score 10", "8a645a76d85e8a6edda6acdf8071ee6b" ); - e.put( "--min_mapping_quality_score 10", "cf6c38de9872bdbf5da1a22d8c962ebe" ); - e.put( "--max_mismatches_in_40bp_window 5", "e21a319eb0efbe11c3c1eeea7c9c0144" ); + e.put( "-genotype", "9f7886f0f63de56ebcd187c22725b1f7" ); + e.put( "-all_bases", "17f08368b48ebb5e18063da47f689e4e" ); + e.put( "--min_base_quality_score 10", "cdac609651438aa8e74cb859aa04e2f0" ); + e.put( "--min_mapping_quality_score 10", "ee1afd51eda0d005e5b3e908af63eb27" ); + e.put( "--max_mismatches_in_40bp_window 5", "b9bec75e1aa4c3f45e0a88ee74b41321" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -103,7 +103,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, - Arrays.asList("6823ce46547cf9693cda040dcbd4c9c1")); + Arrays.asList("022cf14445f7cd6beb04957c8fea9ae5")); executeTest("testConfidence", spec); }