From 487fb2afb43693f6cb3c5fa52d5c98eff7451ddf Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 9 Jan 2013 15:30:46 -0500 Subject: [PATCH 1/5] Bug fix for the case of overlapping assembled and partially-assembled events created by the HC. Unfortunately the symbolic allele can't be combined with the indel allele because the reference basis will change. --- .../sting/gatk/walkers/haplotypecaller/GenotypingEngine.java | 4 ++-- .../variant/variantcontext/VariantContextUtils.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 7c25ab551..42f5e1455 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -92,6 +92,7 @@ public class GenotypingEngine { cleanUpSymbolicUnassembledEvents( haplotypes ); if( !in_GGA_mode && samples.size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure mergeConsecutiveEventsBasedOnLD( haplotypes, samples, haplotypeReadMap, startPosKeySet, ref, refLoc ); + cleanUpSymbolicUnassembledEvents( haplotypes ); // the newly created merged events could be overlapping the unassembled events } if( in_GGA_mode ) { for( final VariantContext compVC : activeAllelesToGenotype ) { @@ -261,7 +262,6 @@ public class GenotypingEngine { return returnMap; } - protected static void cleanUpSymbolicUnassembledEvents( final List haplotypes ) { final ArrayList haplotypesToRemove = new ArrayList(); for( final Haplotype h : haplotypes ) { @@ -269,7 +269,7 @@ public class GenotypingEngine { if( vc.isSymbolic() ) { for( final Haplotype h2 : haplotypes ) { for( final VariantContext vc2 : h2.getEventMap().values() ) { - if( vc.getStart() == vc2.getStart() && vc2.isIndel() ) { + if( vc.getStart() == vc2.getStart() && (vc2.isIndel() || vc2.isMNP()) ) { // unfortunately symbolic alleles can't currently be combined with non-point events haplotypesToRemove.add(h); break; } diff --git a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java index 4d1e70340..2d74a1a4a 100755 --- a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java @@ -710,7 +710,7 @@ public class VariantContextUtils { for ( VariantContext vc : VCs ) { // look at previous variant contexts of different type. If: - // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list + // a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list // b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list) // c) neither: do nothing, just add vc to its own list boolean addtoOwnList = true; From 1a18947abfa37e8df106e6ffd485c765c004b28d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 9 Jan 2013 15:54:02 -0500 Subject: [PATCH 2/5] Adding new command line argument requested on the forum to control the maximum number of haplotypes that are sent forward for genotyping. In the presence of a large degree of heterozygosity the current algorithm breaks down and so this argument would need to be increased. --- .../gatk/walkers/haplotypecaller/HaplotypeCaller.java | 7 ++++++- .../haplotypecaller/LikelihoodCalculationEngine.java | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index cde089b34..acc76815e 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -136,6 +136,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false) protected int gcpHMM = 10; + @Advanced + @Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false) + protected int maxNumHaplotypesInPopulation = 13; + @Advanced @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false) protected int minKmer = 11; @@ -414,7 +418,8 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap ) : haplotypes ); + final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? + likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes ); for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoods( UG_engine, bestHaplotypes, diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index e416b489b..6ca1ec1e8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -273,7 +273,7 @@ public class LikelihoodCalculationEngine { @Requires({"haplotypes.size() > 0"}) @Ensures({"result.size() <= haplotypes.size()"}) - public ArrayList selectBestHaplotypes( final ArrayList haplotypes, final Map stratifiedReadMap ) { + public ArrayList selectBestHaplotypes( final ArrayList haplotypes, final Map stratifiedReadMap, final int maxNumHaplotypesInPopulation ) { final int numHaplotypes = haplotypes.size(); final Set sampleKeySet = stratifiedReadMap.keySet(); @@ -287,7 +287,7 @@ public class LikelihoodCalculationEngine { int hap1 = 0; int hap2 = 0; //double bestElement = Double.NEGATIVE_INFINITY; - final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 ); + final int maxChosenHaplotypes = Math.min( maxNumHaplotypesInPopulation, sampleKeySet.size() * 2 + 1 ); while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { double maxElement = Double.NEGATIVE_INFINITY; for( int iii = 0; iii < numHaplotypes; iii++ ) { From 4a8466783a30f8761f2a6837aaf1d6abe0d9e676 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 7 Jan 2013 14:46:28 -0500 Subject: [PATCH 3/5] License Parser and update all license scripts * Implemented a script that parses and replaces the license (to be used by git on every commit) * Implemented a shell script that makes use of the license parser to add the license to all java files in the repo GSA-685 GSATDG-6 GSATDG-16 --- licensing/private_license.txt | 7 +++++++ licensing/protected_license.txt | 7 +++++++ licensing/public_license.txt | 7 +++++++ 3 files changed, 21 insertions(+) create mode 100644 licensing/private_license.txt create mode 100644 licensing/protected_license.txt create mode 100644 licensing/public_license.txt diff --git a/licensing/private_license.txt b/licensing/private_license.txt new file mode 100644 index 000000000..e5609db65 --- /dev/null +++ b/licensing/private_license.txt @@ -0,0 +1,7 @@ +PRIVATE PRIVATE PRIVATE + +This is a test license for the GATK +all files will abide to it +one license to rule them all + +PRIVATE PRIVATE PRIVATE diff --git a/licensing/protected_license.txt b/licensing/protected_license.txt new file mode 100644 index 000000000..d3966e17d --- /dev/null +++ b/licensing/protected_license.txt @@ -0,0 +1,7 @@ +PROTECTED PROTECTED PROTECTED + +This is a test license for the GATK +all files will abide to it +one license to rule them all + +PROTECTED PROTECTED PROTECTED diff --git a/licensing/public_license.txt b/licensing/public_license.txt new file mode 100644 index 000000000..84e93af8e --- /dev/null +++ b/licensing/public_license.txt @@ -0,0 +1,7 @@ +PUBLIC PUBLIC PUBLIC + +This is a test license for the GATK +all files will abide to it +one license to rule them all + +PUBLIC PUBLIC PUBLIC From 3e52ce5fa8d6f8ec80c73b65959b6604f035bcef Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Thu, 10 Jan 2013 11:44:24 -0500 Subject: [PATCH 4/5] Remove DepthOfCoverage.java because it is no longer public - Move Pileup.java and PrintReads.java to their new homes --- public/packages/GATKEngine.xml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml index 27d2afa47..42b3a4d6e 100644 --- a/public/packages/GATKEngine.xml +++ b/public/packages/GATKEngine.xml @@ -53,11 +53,10 @@ - - - + + From f801cb3be51ee9b0e01c57cf9a55b2515e2a5d41 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 10 Jan 2013 13:46:49 -0500 Subject: [PATCH 5/5] Updating Queue maven version to 0.0.2 - After changes to the repositories, we are making sure that cmi-queueext is getting the right file. --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 3aead62d6..2dfff0cc2 100644 --- a/build.xml +++ b/build.xml @@ -991,7 +991,7 @@ - +